Use 5m instead 1m range

Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
This commit is contained in:
Michel Hollands 2024-04-03 10:30:10 +01:00
parent 6eac38d4ec
commit 17b52d572a
3 changed files with 205 additions and 221 deletions

View File

@ -1,53 +1,53 @@
groups: groups:
- name: "loki_rules" - name: "loki_rules"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:loki_request_duration_seconds:99quantile" record: "cluster_job:loki_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:loki_request_duration_seconds:50quantile" record: "cluster_job:loki_request_duration_seconds:50quantile"
- expr: "sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m])) - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[5m]))
by (cluster, job)" by (cluster, job)"
record: "cluster_job:loki_request_duration_seconds:avg" record: "cluster_job:loki_request_duration_seconds:avg"
- expr: "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job)" - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job)"
record: "cluster_job:loki_request_duration_seconds_bucket:sum_rate" record: "cluster_job:loki_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job)" - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job)"
record: "cluster_job:loki_request_duration_seconds_sum:sum_rate" record: "cluster_job:loki_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job)" - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job)"
record: "cluster_job:loki_request_duration_seconds_count:sum_rate" record: "cluster_job:loki_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))" by (le, cluster, job, route))"
record: "cluster_job_route:loki_request_duration_seconds:99quantile" record: "cluster_job_route:loki_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))" by (le, cluster, job, route))"
record: "cluster_job_route:loki_request_duration_seconds:50quantile" record: "cluster_job_route:loki_request_duration_seconds:50quantile"
- expr: "sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)
/ sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)" / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
record: "cluster_job_route:loki_request_duration_seconds:avg" record: "cluster_job_route:loki_request_duration_seconds:avg"
- expr: "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job,
route)" route)"
record: "cluster_job_route:loki_request_duration_seconds_bucket:sum_rate" record: "cluster_job_route:loki_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)" - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)"
record: "cluster_job_route:loki_request_duration_seconds_sum:sum_rate" record: "cluster_job_route:loki_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)" - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
record: "cluster_job_route:loki_request_duration_seconds_count:sum_rate" record: "cluster_job_route:loki_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))" by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:loki_request_duration_seconds:99quantile" record: "cluster_namespace_job_route:loki_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))" by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:loki_request_duration_seconds:50quantile" record: "cluster_namespace_job_route:loki_request_duration_seconds:50quantile"
- expr: "sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster,
namespace, job, route)" namespace, job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds:avg" record: "cluster_namespace_job_route:loki_request_duration_seconds:avg"
- expr: "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, namespace,
job, route)" job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate" record: "cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
job, route)" job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate" record: "cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, namespace,
job, route)" job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate" record: "cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate"

View File

@ -1,322 +1,317 @@
groups: groups:
- name: "mimir_api_1" - name: "mimir_api_1"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_request_duration_seconds:99quantile" record: "cluster_job:cortex_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_request_duration_seconds:50quantile" record: "cluster_job:cortex_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m])) - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[5m]))
by (cluster, job)" by (cluster, job)"
record: "cluster_job:cortex_request_duration_seconds:avg" record: "cluster_job:cortex_request_duration_seconds:avg"
- expr: "sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)" - expr: "sum(rate(cortex_request_duration_seconds_bucket[5m])) by (le, cluster, job)"
record: "cluster_job:cortex_request_duration_seconds_bucket:sum_rate" record: "cluster_job:cortex_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_request_duration_seconds_sum:sum_rate" record: "cluster_job:cortex_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job)" - expr: "sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_request_duration_seconds_count:sum_rate" record: "cluster_job:cortex_request_duration_seconds_count:sum_rate"
- name: "mimir_api_2" - name: "mimir_api_2"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))" by (le, cluster, job, route))"
record: "cluster_job_route:cortex_request_duration_seconds:99quantile" record: "cluster_job_route:cortex_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))" by (le, cluster, job, route))"
record: "cluster_job_route:cortex_request_duration_seconds:50quantile" record: "cluster_job_route:cortex_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job, route)
/ sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)" / sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, job, route)"
record: "cluster_job_route:cortex_request_duration_seconds:avg" record: "cluster_job_route:cortex_request_duration_seconds:avg"
- expr: "sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, - expr: "sum(rate(cortex_request_duration_seconds_bucket[5m])) by (le, cluster, job,
route)" route)"
record: "cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate" record: "cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)" - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job, route)"
record: "cluster_job_route:cortex_request_duration_seconds_sum:sum_rate" record: "cluster_job_route:cortex_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)" - expr: "sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, job, route)"
record: "cluster_job_route:cortex_request_duration_seconds_count:sum_rate" record: "cluster_job_route:cortex_request_duration_seconds_count:sum_rate"
- name: "mimir_api_3" - name: "mimir_api_3"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))" by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:cortex_request_duration_seconds:99quantile" record: "cluster_namespace_job_route:cortex_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))" by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:cortex_request_duration_seconds:50quantile" record: "cluster_namespace_job_route:cortex_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, namespace,
job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) / sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster,
namespace, job, route)" namespace, job, route)"
record: "cluster_namespace_job_route:cortex_request_duration_seconds:avg" record: "cluster_namespace_job_route:cortex_request_duration_seconds:avg"
- expr: "sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, - expr: "sum(rate(cortex_request_duration_seconds_bucket[5m])) by (le, cluster, namespace,
job, route)" job, route)"
record: "cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate" record: "cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, namespace,
job, route)" job, route)"
record: "cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate" record: "cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - expr: "sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, namespace,
job, route)" job, route)"
record: "cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate" record: "cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate"
- name: "mimir_querier_api" - name: "mimir_querier_api"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_querier_request_duration_seconds:99quantile" record: "cluster_job:cortex_querier_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_querier_request_duration_seconds:50quantile" record: "cluster_job:cortex_querier_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job) / sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
job)" job)"
record: "cluster_job:cortex_querier_request_duration_seconds:avg" record: "cluster_job:cortex_querier_request_duration_seconds:avg"
- expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[5m])) by (le, cluster,
job)" job)"
record: "cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate" record: "cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
job)" job)"
record: "cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate" record: "cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
job)" job)"
record: "cluster_job:cortex_querier_request_duration_seconds_count:sum_rate" record: "cluster_job:cortex_querier_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))" by (le, cluster, job, route))"
record: "cluster_job_route:cortex_querier_request_duration_seconds:99quantile" record: "cluster_job_route:cortex_querier_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))" by (le, cluster, job, route))"
record: "cluster_job_route:cortex_querier_request_duration_seconds:50quantile" record: "cluster_job_route:cortex_querier_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by job, route) / sum(rate(cortex_querier_request_duration_seconds_count[5m])) by
(cluster, job, route)" (cluster, job, route)"
record: "cluster_job_route:cortex_querier_request_duration_seconds:avg" record: "cluster_job_route:cortex_querier_request_duration_seconds:avg"
- expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[5m])) by (le, cluster,
job, route)" job, route)"
record: "cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate" record: "cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
job, route)" job, route)"
record: "cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate" record: "cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
job, route)" job, route)"
record: "cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate" record: "cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))" by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile" record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))" by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile" record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[5m]))
by (cluster, namespace, job, route)" by (cluster, namespace, job, route)"
record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg" record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg"
- expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[5m])) by (le, cluster,
namespace, job, route)" namespace, job, route)"
record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate" record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
namespace, job, route)" namespace, job, route)"
record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate" record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
namespace, job, route)" namespace, job, route)"
record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate" record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate"
- name: "mimir_cache" - name: "mimir_cache"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[5m]))
by (le, cluster, job, method))" by (le, cluster, job, method))"
record: "cluster_job_method:cortex_memcache_request_duration_seconds:99quantile" record: "cluster_job_method:cortex_memcache_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[5m]))
by (le, cluster, job, method))" by (le, cluster, job, method))"
record: "cluster_job_method:cortex_memcache_request_duration_seconds:50quantile" record: "cluster_job_method:cortex_memcache_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_memcache_request_duration_seconds_sum[5m])) by (cluster,
job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m])) job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[5m]))
by (cluster, job, method)" by (cluster, job, method)"
record: "cluster_job_method:cortex_memcache_request_duration_seconds:avg" record: "cluster_job_method:cortex_memcache_request_duration_seconds:avg"
- expr: "sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_memcache_request_duration_seconds_bucket[5m])) by (le, cluster,
job, method)" job, method)"
record: "cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate" record: "cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_memcache_request_duration_seconds_sum[5m])) by (cluster,
job, method)" job, method)"
record: "cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate" record: "cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_memcache_request_duration_seconds_count[5m])) by (cluster,
job, method)" job, method)"
record: "cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate" record: "cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_cache_request_duration_seconds:99quantile" record: "cluster_job:cortex_cache_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_cache_request_duration_seconds:50quantile" record: "cluster_job:cortex_cache_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job) - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job)
/ sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)" / sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_cache_request_duration_seconds:avg" record: "cluster_job:cortex_cache_request_duration_seconds:avg"
- expr: "sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_cache_request_duration_seconds_bucket[5m])) by (le, cluster,
job)" job)"
record: "cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate" record: "cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate" record: "cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster,
job)" job)"
record: "cluster_job:cortex_cache_request_duration_seconds_count:sum_rate" record: "cluster_job:cortex_cache_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
by (le, cluster, job, method))" by (le, cluster, job, method))"
record: "cluster_job_method:cortex_cache_request_duration_seconds:99quantile" record: "cluster_job_method:cortex_cache_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
by (le, cluster, job, method))" by (le, cluster, job, method))"
record: "cluster_job_method:cortex_cache_request_duration_seconds:50quantile" record: "cluster_job_method:cortex_cache_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job,
method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, method) / sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster,
job, method)" job, method)"
record: "cluster_job_method:cortex_cache_request_duration_seconds:avg" record: "cluster_job_method:cortex_cache_request_duration_seconds:avg"
- expr: "sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_cache_request_duration_seconds_bucket[5m])) by (le, cluster,
job, method)" job, method)"
record: "cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate" record: "cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job,
method)" method)"
record: "cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate" record: "cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster,
job, method)" job, method)"
record: "cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate" record: "cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate"
- name: "mimir_storage" - name: "mimir_storage"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_kv_request_duration_seconds:99quantile" record: "cluster_job:cortex_kv_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_kv_request_duration_seconds:50quantile" record: "cluster_job:cortex_kv_request_duration_seconds:50quantile"
- expr: "sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - expr: "sum(rate(cortex_kv_request_duration_seconds_sum[5m])) by (cluster, job)
/ sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)" / sum(rate(cortex_kv_request_duration_seconds_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_kv_request_duration_seconds:avg" record: "cluster_job:cortex_kv_request_duration_seconds:avg"
- expr: "sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_kv_request_duration_seconds_bucket[5m])) by (le, cluster,
job)" job)"
record: "cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate" record: "cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_kv_request_duration_seconds_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate" record: "cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)" - expr: "sum(rate(cortex_kv_request_duration_seconds_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_kv_request_duration_seconds_count:sum_rate" record: "cluster_job:cortex_kv_request_duration_seconds_count:sum_rate"
- name: "mimir_queries" - name: "mimir_queries"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_query_frontend_retries:99quantile" record: "cluster_job:cortex_query_frontend_retries:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_query_frontend_retries:50quantile" record: "cluster_job:cortex_query_frontend_retries:50quantile"
- expr: "sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - expr: "sum(rate(cortex_query_frontend_retries_sum[5m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[5m]))
by (cluster, job)" by (cluster, job)"
record: "cluster_job:cortex_query_frontend_retries:avg" record: "cluster_job:cortex_query_frontend_retries:avg"
- expr: "sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)" - expr: "sum(rate(cortex_query_frontend_retries_bucket[5m])) by (le, cluster, job)"
record: "cluster_job:cortex_query_frontend_retries_bucket:sum_rate" record: "cluster_job:cortex_query_frontend_retries_bucket:sum_rate"
- expr: "sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_query_frontend_retries_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_query_frontend_retries_sum:sum_rate" record: "cluster_job:cortex_query_frontend_retries_sum:sum_rate"
- expr: "sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job)" - expr: "sum(rate(cortex_query_frontend_retries_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_query_frontend_retries_count:sum_rate" record: "cluster_job:cortex_query_frontend_retries_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile" record: "cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile" record: "cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile"
- expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_sum[5m])) by (cluster,
job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[5m])) by
(cluster, job)" (cluster, job)"
record: "cluster_job:cortex_query_frontend_queue_duration_seconds:avg" record: "cluster_job:cortex_query_frontend_queue_duration_seconds:avg"
- expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[5m])) by (le,
cluster, job)" cluster, job)"
record: "cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate" record: "cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_sum[5m])) by (cluster,
job)" job)"
record: "cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate" record: "cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate"
- expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_count[5m])) by (cluster,
job)" job)"
record: "cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate" record: "cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate"
- name: "mimir_ingester_queries" - name: "mimir_ingester_queries"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_ingester_queried_series:99quantile" record: "cluster_job:cortex_ingester_queried_series:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_ingester_queried_series:50quantile" record: "cluster_job:cortex_ingester_queried_series:50quantile"
- expr: "sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m])) - expr: "sum(rate(cortex_ingester_queried_series_sum[5m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[5m]))
by (cluster, job)" by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_series:avg" record: "cluster_job:cortex_ingester_queried_series:avg"
- expr: "sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)" - expr: "sum(rate(cortex_ingester_queried_series_bucket[5m])) by (le, cluster, job)"
record: "cluster_job:cortex_ingester_queried_series_bucket:sum_rate" record: "cluster_job:cortex_ingester_queried_series_bucket:sum_rate"
- expr: "sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_ingester_queried_series_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_series_sum:sum_rate" record: "cluster_job:cortex_ingester_queried_series_sum:sum_rate"
- expr: "sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job)" - expr: "sum(rate(cortex_ingester_queried_series_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_series_count:sum_rate" record: "cluster_job:cortex_ingester_queried_series_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_ingester_queried_samples:99quantile" record: "cluster_job:cortex_ingester_queried_samples:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_ingester_queried_samples:50quantile" record: "cluster_job:cortex_ingester_queried_samples:50quantile"
- expr: "sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m])) - expr: "sum(rate(cortex_ingester_queried_samples_sum[5m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[5m]))
by (cluster, job)" by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_samples:avg" record: "cluster_job:cortex_ingester_queried_samples:avg"
- expr: "sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)" - expr: "sum(rate(cortex_ingester_queried_samples_bucket[5m])) by (le, cluster, job)"
record: "cluster_job:cortex_ingester_queried_samples_bucket:sum_rate" record: "cluster_job:cortex_ingester_queried_samples_bucket:sum_rate"
- expr: "sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_ingester_queried_samples_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_samples_sum:sum_rate" record: "cluster_job:cortex_ingester_queried_samples_sum:sum_rate"
- expr: "sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job)" - expr: "sum(rate(cortex_ingester_queried_samples_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_samples_count:sum_rate" record: "cluster_job:cortex_ingester_queried_samples_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_ingester_queried_exemplars:99quantile" record: "cluster_job:cortex_ingester_queried_exemplars:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[5m]))
by (le, cluster, job))" by (le, cluster, job))"
record: "cluster_job:cortex_ingester_queried_exemplars:50quantile" record: "cluster_job:cortex_ingester_queried_exemplars:50quantile"
- expr: "sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) / - expr: "sum(rate(cortex_ingester_queried_exemplars_sum[5m])) by (cluster, job) /
sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job)" sum(rate(cortex_ingester_queried_exemplars_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_exemplars:avg" record: "cluster_job:cortex_ingester_queried_exemplars:avg"
- expr: "sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - expr: "sum(rate(cortex_ingester_queried_exemplars_bucket[5m])) by (le, cluster,
job)" job)"
record: "cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate" record: "cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate"
- expr: "sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job)" - expr: "sum(rate(cortex_ingester_queried_exemplars_sum[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate" record: "cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate"
- expr: "sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job)" - expr: "sum(rate(cortex_ingester_queried_exemplars_count[5m])) by (cluster, job)"
record: "cluster_job:cortex_ingester_queried_exemplars_count:sum_rate" record: "cluster_job:cortex_ingester_queried_exemplars_count:sum_rate"
- name: "mimir_received_samples" - name: "mimir_received_samples"
rules: rules:
- expr: "| - expr: "sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m]))"
sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m]))"
record: "cluster_namespace_job:cortex_distributor_received_samples:rate5m" record: "cluster_namespace_job:cortex_distributor_received_samples:rate5m"
- name: "mimir_exemplars_in" - name: "mimir_exemplars_in"
rules: rules:
- expr: "| - expr: "sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m]))"
sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m]))"
record: "cluster_namespace_job:cortex_distributor_exemplars_in:rate5m" record: "cluster_namespace_job:cortex_distributor_exemplars_in:rate5m"
- name: "mimir_received_exemplars" - name: "mimir_received_exemplars"
rules: rules:
- expr: "| - expr: "sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m]))"
sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m]))"
record: "cluster_namespace_job:cortex_distributor_received_exemplars:rate5m" record: "cluster_namespace_job:cortex_distributor_received_exemplars:rate5m"
- name: "mimir_exemplars_ingested" - name: "mimir_exemplars_ingested"
rules: rules:
- expr: "| - expr: "sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m]))"
sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m]))"
record: "cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m" record: "cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m"
- name: "mimir_exemplars_appended" - name: "mimir_exemplars_appended"
rules: rules:
- expr: "| - expr: "sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m]))"
sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m]))"
record: "cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m" record: "cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m"
- name: "mimir_scaling_rules" - name: "mimir_scaling_rules"
rules: rules:
- expr: "| - expr: |
# Convenience rule to get the number of replicas for both a deployment and a statefulset. # Convenience rule to get the number of replicas for both a deployment and a statefulset.
# Multi-zone deployments are grouped together removing the \"zone-X\" suffix. # Multi-zone deployments are grouped together removing the "zone-X" suffix.
sum by (cluster, namespace, deployment) ( sum by (cluster, namespace, deployment) (
label_replace( label_replace(
kube_deployment_spec_replicas, kube_deployment_spec_replicas,
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
) )
or or
sum by (cluster, namespace, deployment) ( sum by (cluster, namespace, deployment) (
label_replace(kube_statefulset_replicas, \"deployment\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\") label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")
)" )
record: "cluster_namespace_deployment:actual_replicas:count" record: "cluster_namespace_deployment:actual_replicas:count"
- expr: "| - expr: |
ceil( ceil(
quantile_over_time(0.99, quantile_over_time(0.99,
sum by (cluster, namespace) ( sum by (cluster, namespace) (
@ -324,21 +319,21 @@ groups:
)[24h:] )[24h:]
) )
/ 240000 / 240000
)" )
labels: labels:
deployment: "distributor" deployment: "distributor"
reason: "sample_rate" reason: "sample_rate"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
ceil( ceil(
sum by (cluster, namespace) (cortex_limits_overrides{limit_name=\"ingestion_rate\"}) sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"})
* 0.59999999999999998 / 240000 * 0.59999999999999998 / 240000
)" )
labels: labels:
deployment: "distributor" deployment: "distributor"
reason: "sample_rate_limits" reason: "sample_rate_limits"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
ceil( ceil(
quantile_over_time(0.99, quantile_over_time(0.99,
sum by (cluster, namespace) ( sum by (cluster, namespace) (
@ -346,12 +341,12 @@ groups:
)[24h:] )[24h:]
) )
* 3 / 80000 * 3 / 80000
)" )
labels: labels:
deployment: "ingester" deployment: "ingester"
reason: "sample_rate" reason: "sample_rate"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
ceil( ceil(
quantile_over_time(0.99, quantile_over_time(0.99,
sum by(cluster, namespace) ( sum by(cluster, namespace) (
@ -359,59 +354,59 @@ groups:
)[24h:] )[24h:]
) )
/ 1500000 / 1500000
)" )
labels: labels:
deployment: "ingester" deployment: "ingester"
reason: "active_series" reason: "active_series"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
ceil( ceil(
sum by (cluster, namespace) (cortex_limits_overrides{limit_name=\"max_global_series_per_user\"}) sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"})
* 3 * 0.59999999999999998 / 1500000 * 3 * 0.59999999999999998 / 1500000
)" )
labels: labels:
deployment: "ingester" deployment: "ingester"
reason: "active_series_limits" reason: "active_series_limits"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
ceil( ceil(
sum by (cluster, namespace) (cortex_limits_overrides{limit_name=\"ingestion_rate\"}) sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"})
* 0.59999999999999998 / 80000 * 0.59999999999999998 / 80000
)" )
labels: labels:
deployment: "ingester" deployment: "ingester"
reason: "sample_rate_limits" reason: "sample_rate_limits"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
ceil( ceil(
(sum by (cluster, namespace) ( (sum by (cluster, namespace) (
cortex_ingester_tsdb_storage_blocks_bytes{job=~\".+/ingester.*\"} cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"}
) / 4) ) / 4)
/ /
avg by (cluster, namespace) ( avg by (cluster, namespace) (
memcached_limit_bytes{job=~\".+/memcached\"} memcached_limit_bytes{job=~".+/memcached"}
) )
)" )
labels: labels:
deployment: "memcached" deployment: "memcached"
reason: "active_series" reason: "active_series"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
sum by (cluster, namespace, deployment) ( sum by (cluster, namespace, deployment) (
label_replace( label_replace(
label_replace( label_replace(
sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[5m])),
\"deployment\", \"$1\", \"pod\", \"(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))\" "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
), ),
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
)" )
record: "cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate" record: "cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate"
- expr: "| - expr: |
# Convenience rule to get the CPU request for both a deployment and a statefulset. # Convenience rule to get the CPU request for both a deployment and a statefulset.
# Multi-zone deployments are grouped together removing the \"zone-X\" suffix. # Multi-zone deployments are grouped together removing the "zone-X" suffix.
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
# that remove resource metrics, ref: # that remove resource metrics, ref:
# - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16
@ -424,11 +419,11 @@ groups:
label_replace( label_replace(
label_replace( label_replace(
kube_pod_container_resource_requests_cpu_cores, kube_pod_container_resource_requests_cpu_cores,
\"deployment\", \"$1\", \"pod\", \"(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))\" "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
), ),
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
) )
) )
@ -439,17 +434,17 @@ groups:
sum by (cluster, namespace, deployment) ( sum by (cluster, namespace, deployment) (
label_replace( label_replace(
label_replace( label_replace(
kube_pod_container_resource_requests{resource=\"cpu\"}, kube_pod_container_resource_requests{resource="cpu"},
\"deployment\", \"$1\", \"pod\", \"(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))\" "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
), ),
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
) )
)" )
record: "cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum" record: "cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum"
- expr: "| - expr: |
# Jobs should be sized to their CPU usage. # Jobs should be sized to their CPU usage.
# We do this by comparing 99th percentile usage over the last 24hrs to # We do this by comparing 99th percentile usage over the last 24hrs to
# their current provisioned #replicas and resource requests. # their current provisioned #replicas and resource requests.
@ -459,28 +454,28 @@ groups:
quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h])
/ /
cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum
)" )
labels: labels:
reason: "cpu_usage" reason: "cpu_usage"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- expr: "| - expr: |
# Convenience rule to get the Memory utilization for both a deployment and a statefulset. # Convenience rule to get the Memory utilization for both a deployment and a statefulset.
# Multi-zone deployments are grouped together removing the \"zone-X\" suffix. # Multi-zone deployments are grouped together removing the "zone-X" suffix.
sum by (cluster, namespace, deployment) ( sum by (cluster, namespace, deployment) (
label_replace( label_replace(
label_replace( label_replace(
container_memory_usage_bytes{image!=\"\"}, container_memory_usage_bytes{image!=""},
\"deployment\", \"$1\", \"pod\", \"(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))\" "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
), ),
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
)" )
record: "cluster_namespace_deployment:container_memory_usage_bytes:sum" record: "cluster_namespace_deployment:container_memory_usage_bytes:sum"
- expr: "| - expr: |
# Convenience rule to get the Memory request for both a deployment and a statefulset. # Convenience rule to get the Memory request for both a deployment and a statefulset.
# Multi-zone deployments are grouped together removing the \"zone-X\" suffix. # Multi-zone deployments are grouped together removing the "zone-X" suffix.
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
# that remove resource metrics, ref: # that remove resource metrics, ref:
# - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16
@ -493,11 +488,11 @@ groups:
label_replace( label_replace(
label_replace( label_replace(
kube_pod_container_resource_requests_memory_bytes, kube_pod_container_resource_requests_memory_bytes,
\"deployment\", \"$1\", \"pod\", \"(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))\" "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
), ),
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
) )
) )
@ -508,17 +503,17 @@ groups:
sum by (cluster, namespace, deployment) ( sum by (cluster, namespace, deployment) (
label_replace( label_replace(
label_replace( label_replace(
kube_pod_container_resource_requests{resource=\"memory\"}, kube_pod_container_resource_requests{resource="memory"},
\"deployment\", \"$1\", \"pod\", \"(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))\" "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
), ),
# The question mark in \"(.*?)\" is used to make it non-greedy, otherwise it # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed. # always matches everything and the (optional) zone is not removed.
\"deployment\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\" "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
) )
) )
)" )
record: "cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum" record: "cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum"
- expr: "| - expr: |
# Jobs should be sized to their Memory usage. # Jobs should be sized to their Memory usage.
# We do this by comparing 99th percentile usage over the last 24hrs to # We do this by comparing 99th percentile usage over the last 24hrs to
# their current provisioned #replicas and resource requests. # their current provisioned #replicas and resource requests.
@ -528,44 +523,33 @@ groups:
quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h])
/ /
cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum
)" )
labels: labels:
reason: "memory_usage" reason: "memory_usage"
record: "cluster_namespace_deployment_reason:required_replicas:count" record: "cluster_namespace_deployment_reason:required_replicas:count"
- name: "mimir_alertmanager_rules" - name: "mimir_alertmanager_rules"
rules: rules:
- expr: "| - expr: "sum by (cluster, job, pod) (cortex_alertmanager_alerts)"
sum by (cluster, job, pod) (cortex_alertmanager_alerts)"
record: "cluster_job_pod:cortex_alertmanager_alerts:sum" record: "cluster_job_pod:cortex_alertmanager_alerts:sum"
- expr: "| - expr: "sum by (cluster, job, pod) (cortex_alertmanager_silences)"
sum by (cluster, job, pod) (cortex_alertmanager_silences)"
record: "cluster_job_pod:cortex_alertmanager_silences:sum" record: "cluster_job_pod:cortex_alertmanager_silences:sum"
- expr: "| - expr: "sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m]))"
sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m]))"
record: "cluster_job:cortex_alertmanager_alerts_received_total:rate5m" record: "cluster_job:cortex_alertmanager_alerts_received_total:rate5m"
- expr: "| - expr: "sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m]))"
sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m]))"
record: "cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m" record: "cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m"
- expr: "| - expr: "sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m]))"
sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m]))"
record: "cluster_job_integration:cortex_alertmanager_notifications_total:rate5m" record: "cluster_job_integration:cortex_alertmanager_notifications_total:rate5m"
- expr: "| - expr: "sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m]))"
sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m]))"
record: "cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m" record: "cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m"
- expr: "| - expr: "sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m]))"
sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m]))"
record: "cluster_job:cortex_alertmanager_state_replication_total:rate5m" record: "cluster_job:cortex_alertmanager_state_replication_total:rate5m"
- expr: "| - expr: "sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m]))"
sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m]))"
record: "cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m" record: "cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m"
- expr: "| - expr: "sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m]))"
sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m]))"
record: "cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m" record: "cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m"
- expr: "| - expr: "sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m]))"
sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m]))"
record: "cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m" record: "cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m"
- name: "mimir_ingester_rules" - name: "mimir_ingester_rules"
rules: rules:
- expr: "| - expr: "sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[5m]))"
sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m]))"
record: "cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m" record: "cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m"

View File

@ -1,15 +1,15 @@
groups: groups:
- name: "tempo_rules" - name: "tempo_rules"
rules: rules:
- expr: "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))" - expr: "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:tempo_request_duration_seconds:99quantile" record: "cluster_namespace_job_route:tempo_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))" - expr: "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:tempo_request_duration_seconds:50quantile" record: "cluster_namespace_job_route:tempo_request_duration_seconds:50quantile"
- expr: "sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)" - expr: "sum(rate(tempo_request_duration_seconds_sum[5m])) by (cluster, namespace, job, route) / sum(rate(tempo_request_duration_seconds_count[5m])) by (cluster, namespace, job, route)"
record: "cluster_namespace_job_route:tempo_request_duration_seconds:avg" record: "cluster_namespace_job_route:tempo_request_duration_seconds:avg"
- expr: "sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)" - expr: "sum(rate(tempo_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, job, route)"
record: "cluster_namespace_job_route:tempo_request_duration_seconds_bucket:sum_rate" record: "cluster_namespace_job_route:tempo_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route)" - expr: "sum(rate(tempo_request_duration_seconds_sum[5m])) by (cluster, namespace, job, route)"
record: "cluster_namespace_job_route:tempo_request_duration_seconds_sum:sum_rate" record: "cluster_namespace_job_route:tempo_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)" - expr: "sum(rate(tempo_request_duration_seconds_count[5m])) by (cluster, namespace, job, route)"
record: "cluster_namespace_job_route:tempo_request_duration_seconds_count:sum_rate" record: "cluster_namespace_job_route:tempo_request_duration_seconds_count:sum_rate"