Update the README and docs

Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
Merge pull request #123 from grafana/cleanup_ci
2024-05-14 14:06:49 +01:00 · 2024-05-14 11:51:24 +01:00 · 2024-05-14 11:50:57 +01:00 · 2024-05-14 10:52:01 +01:00 · 2024-05-14 10:50:12 +01:00 · 2024-05-14 10:48:45 +01:00
44 changed files with 6222 additions and 6554 deletions
--- a/.github/configs/cluster-config.yaml
+++ b/.github/configs/cluster-config.yaml
@@ -0,0 +1,19 @@
+apiVersion: kind.x-k8s.io/v1alpha4
+kind: Cluster
+nodes:
+  - role: control-plane
+    kubeadmConfigPatches:
+      - |
+        kind: ClusterConfiguration
+        controllerManager:
+          extraArgs:
+            bind-address: 0.0.0.0
+            secure-port: "10257"
+        scheduler:
+          extraArgs:
+            bind-address: 0.0.0.0
+            secure-port: "10259"
+      - |
+        kind: KubeProxyConfiguration
+        metricsBindAddress: 0.0.0.0:10249
+  - role: worker
--- a/.github/configs/updatecli.d/grafana.yaml
+++ b/.github/configs/updatecli.d/grafana.yaml
@@ -0,0 +1,30 @@
+name: Bump grafana version specified in the values.yaml
+sources:
+    latestGrafanaRelease:
+        name: Get latest grafana release on Github
+        kind: githubrelease
+        spec:
+            owner: grafana
+            repository: grafana
+            token: '{{ requiredEnv "UPDATECLI_GITHUB_TOKEN" }}'
+            versionfilter:
+                kind: latest
+        transformers:
+          - trimprefix: "v"
+conditions:
+    grafanaImagePublished:
+        name: Ensure the latest Grafana is published on DockerHub
+        kind: dockerimage
+        source-id: latestGrafanaRelease
+        spec:
+            image: "grafana/grafana"
+targets:
+    grafana:
+        name: Update Grafana version in values.yaml
+        kind: helmchart
+        spec:
+            file: values.yaml
+            key: $.grafana.version
+            name: charts/meta-monitoring
+            versionincrement: none
+        sourceid: latestGrafanaRelease
--- a/.github/workflows/check-for-dependency-updates.yaml
+++ b/.github/workflows/check-for-dependency-updates.yaml
@@ -16,8 +16,8 @@ env:
    UPDATECLI_GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

 jobs:
-    updateLoki:
-        name: Update the Loki subchart
+    updateVersions:
+        name: Update the subcharts
        runs-on: "ubuntu-latest"
        steps:
            - name: Checkout
@@ -26,7 +26,7 @@ jobs:
            - name: Install Updatecli
              uses: updatecli/updatecli-action@v2

-            - name: Run Updatecli
+            - name: Run Updatecli for Loki
              id: update-loki
              run: |
                updatecli apply --config ${UPDATECLI_CONFIG_DIR}/loki.yaml
@@ -34,31 +34,7 @@ jobs:
                  echo "changed=true" >> "${GITHUB_OUTPUT}"
                fi

-            - name: Create pull request
-              if: steps.update-loki.outputs.changed == 'true'
-              uses: peter-evans/create-pull-request@v5
-              with:
-                title: "[dependency] Update the Loki subchart"
-                body: "Updates the Loki subchart"
-                base: main
-                author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
-                committer: "GitHub <noreply@github.com>"
-                commit-message: Update loki
-                labels: dependencies
-                branch: chore/update-loki
-                delete-branch: true
-
-    updateGrafanaAlloy:
-        name: Update the Grafana Alloy subchart
-        runs-on: "ubuntu-latest"
-        steps:
-            - name: Checkout
-              uses: actions/checkout@v2
-
-            - name: Install Updatecli
-              uses: updatecli/updatecli-action@v2
-
-            - name: Run Updatecli
+            - name: Run Updatecli for Alloy
              id: update-grafana-alloy
              run: |
                updatecli apply --config ${UPDATECLI_CONFIG_DIR}/alloy.yaml
@@ -66,31 +42,7 @@ jobs:
                  echo "changed=true" >> "${GITHUB_OUTPUT}"
                fi

-            - name: Create pull request
-              if: steps.update-grafana-alloy.outputs.changed == 'true'
-              uses: peter-evans/create-pull-request@v5
-              with:
-                title: "[dependency] Update the Grafana Alloy subchart"
-                body: "Updates the Grafana Alloy subchart"
-                base: main
-                author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
-                committer: "GitHub <noreply@github.com>"
-                commit-message: Update Grafana Alloy
-                labels: dependencies
-                branch: chore/update-grafana-alloy
-                delete-branch: true
-
-    updateMimirDistributed:
-        name: Update the Mimir Distributed subchart
-        runs-on: "ubuntu-latest"
-        steps:
-            - name: Checkout
-              uses: actions/checkout@v2
-
-            - name: Install Updatecli
-              uses: updatecli/updatecli-action@v2
-
-            - name: Run Updatecli
+            - name: Run Updatecli for Mimir
              id: update-mimir-distributed
              run: |
                updatecli apply --config ${UPDATECLI_CONFIG_DIR}/mimir-distributed.yaml
@@ -98,31 +50,7 @@ jobs:
                  echo "changed=true" >> "${GITHUB_OUTPUT}"
                fi

-            - name: Create pull request
-              if: steps.update-mimir-distributed.outputs.changed == 'true'
-              uses: peter-evans/create-pull-request@v5
-              with:
-                title: "[dependency] Update the Mimir Distributed subchart"
-                body: "Updates the Mimir Distributed subchart"
-                base: main
-                author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
-                committer: "GitHub <noreply@github.com>"
-                commit-message: Update Mimir Distributed
-                labels: dependencies
-                branch: chore/update-mimir-distributed
-                delete-branch: true
-
-    updateTempoDistributed:
-        name: Update the Tempo Distributed subchart
-        runs-on: "ubuntu-latest"
-        steps:
-            - name: Checkout
-              uses: actions/checkout@v2
-
-            - name: Install Updatecli
-              uses: updatecli/updatecli-action@v2
-
-            - name: Run Updatecli
+            - name: Run Updatecli for Tempo
              id: update-tempo-distributed
              run: |
                updatecli apply --config ${UPDATECLI_CONFIG_DIR}/tempo-distributed.yaml
@@ -130,31 +58,7 @@ jobs:
                  echo "changed=true" >> "${GITHUB_OUTPUT}"
                fi

-            - name: Create pull request
-              if: steps.update-tempo-distributed.outputs.changed == 'true'
-              uses: peter-evans/create-pull-request@v5
-              with:
-                title: "[dependency] Update the Tempo Distributed subchart"
-                body: "Updates the tempo Distributed subchart"
-                base: main
-                author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
-                committer: "GitHub <noreply@github.com>"
-                commit-message: Update Tempo Distributed
-                labels: dependencies
-                branch: chore/update-tempo-distributed
-                delete-branch: true
-
-    updateMinio:
-        name: Update the Minio subchart
-        runs-on: "ubuntu-latest"
-        steps:
-            - name: Checkout
-              uses: actions/checkout@v2
-
-            - name: Install Updatecli
-              uses: updatecli/updatecli-action@v2
-
-            - name: Run Updatecli
+            - name: Run Updatecli for Minio
              id: update-minio
              run: |
                updatecli apply --config ${UPDATECLI_CONFIG_DIR}/minio.yaml
@@ -163,15 +67,47 @@ jobs:
                fi

            - name: Create pull request
-              if: steps.update-minio.outputs.changed == 'true'
+              if: steps.update-loki.outputs.changed == 'true' || steps.update-grafana-alloy.outputs.changed == 'true' || steps.update-mimir-distributed.outputs.changed == 'true' || steps.update-tempo-distributed.outputs.changed == 'true' || steps.update-minio.outputs.changed == 'true'
              uses: peter-evans/create-pull-request@v5
              with:
-                title: "[dependency] Update the Minio subchart"
-                body: "Updates the Minio subchart"
+                title: "[dependency] Update the subcharts"
+                body: "Updates the subcharts"
                base: main
                author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
                committer: "GitHub <noreply@github.com>"
-                commit-message: Update minio
+                commit-message: Update dependencies
+                labels: dependencies
+                branch: chore/update-dependencies
+                delete-branch: true
+
+    updateGrafana:
+        name: Update the Grafana version
+        runs-on: "ubuntu-latest"
+        steps:
+            - name: Checkout
+              uses: actions/checkout@v2
+
+            - name: Install Updatecli
+              uses: updatecli/updatecli-action@v2
+
+            - name: Run Updatecli
+              id: update-grafana
+              run: |
+                updatecli apply --config ${UPDATECLI_CONFIG_DIR}/grafana.yaml
+                if ! git diff --exit-code > /dev/null; then
+                  echo "changed=true" >> "${GITHUB_OUTPUT}"
+                fi
+
+            - name: Create pull request
+              if: steps.update-grafana.outputs.changed == 'true'
+              uses: peter-evans/create-pull-request@v5
+              with:
+                title: "[dependency] Update the Grafana version"
+                body: "Updates the Grafana version"
+                base: main
+                author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
+                committer: "GitHub <noreply@github.com>"
+                commit-message: Update Grafana version
                labels: dependencies
                branch: chore/update-minio
                delete-branch: true
--- a/.github/workflows/helm-ci.yml
+++ b/.github/workflows/helm-ci.yml
@@ -1,6 +1,7 @@
 ---
 name: helm-ci
 on:
+  workflow_dispatch:
  pull_request:
    paths:
      - "charts/meta-monitoring/**"
@@ -24,7 +25,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Checkout
-  #       uses: actions/checkout@v3
+  #       uses: actions/checkout@v4
  #       with:
  #         fetch-depth: 0

@@ -38,10 +39,10 @@ jobs:
  #     - name: Set up Python
  #       uses: actions/setup-python@v4
  #       with:
-  #         python-version: 3.7
+  #         python-version: 3.9

  #     - name: Set up chart-testing
-  #       uses: helm/chart-testing-action@v2.4.0
+  #       uses: helm/chart-testing-action@v2

  #     - name: Run chart-testing (list-changed)
  #       id: list-changed
@@ -55,10 +56,10 @@ jobs:
  #       run: ct lint --config "${CT_CONFIGFILE}" --check-version-increment=false

  #     - name: Create kind cluster
-  #       uses: helm/kind-action@v1.8.0
+  #       uses: helm/kind-action@v1
  #       if: steps.list-changed.outputs.changed == 'true'
  #       with:
-  #         config: tools/kind.config
+  #         config: "${{ github.workspace }}/.github/configs/cluster-config.yaml"

  #     - name: Run chart-testing (install)
  #       run: |
--- a/README.md
+++ b/README.md
@@ -1,8 +1,6 @@
 # meta-monitoring-chart

-This is a meta-monitoring chart for Loki.
-
-Note that this is pre-production software at the moment.
+This is a meta-monitoring chart for Loki, specifically Loki installed via the Loki helm chart.

 ## Local and cloud modes

@@ -11,19 +9,15 @@ to small Loki, Mimir and Tempo installations running in the meta-monitoring name

 ![local mode](docs/images/Meta%20monitoring%20local.png)

-To enable local mode set `local.<logs|metrics|traces>.enabled` to true.
-
 In the cloud mode the logs, metrics and/or traces are sent to Grafana Cloud.

 ![cloud mode](docs/images/Meta%20monitoring%20cloud.png)

-To enable cloud mode set `cloud.<logs|metrics|traces>.enabled` to true. The `endpoint`, `username` and `password` settings for your Grafana Cloud logs, metrics and traces instances have to be filled in as well.
-
 Both modes can be enabled at the same time. Cloud mode is preferred.

 ## Installation

-For more instructions including how to update the chart go to the [installation](docs/installation.md) page.
+For more instructions including how to install the chart go to the [installation](docs/installation.md) page.

 ## Supported features

@@ -33,8 +27,7 @@ For more instructions including how to update the chart go to the [installation]
 - Specify PII regexes that are applied to logs before they are sent to Loki (cloud or local). The capture group in the regex is replaced with *****.
 - a Grafana instance is installed (when local mode is used) with the relevant datasources installed. The following dashboards are installed:
  - logs dashboards
-  - agent dashboards
- Retention is set to 24 hours
+  - Alloy dashboards

 Most of these features are enabled by default. See the values.yaml file for how to enable/disable them.

@@ -42,8 +35,7 @@ Most of these features are enabled by default. See the values.yaml file for how

 - This has not been tested on Openshift yet.
 - The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
- MinIO is used as storage at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.
- Agent self monitoring is not done at the moment.
+- MinIO is used as storage for the local mode at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.

 ## Developer help topics

--- a/charts/meta-monitoring/Chart.lock
+++ b/charts/meta-monitoring/Chart.lock
@@ -1,18 +1,18 @@
 dependencies:
 - name: loki
  repository: https://grafana.github.io/helm-charts
-  version: 6.5.0
+  version: 6.5.2
 - name: alloy
  repository: https://grafana.github.io/helm-charts
-  version: 0.1.1
+  version: 0.2.0
 - name: mimir-distributed
  repository: https://grafana.github.io/helm-charts
  version: 5.3.0
 - name: tempo-distributed
  repository: https://grafana.github.io/helm-charts
-  version: 1.9.7
+  version: 1.9.9
 - name: minio
  repository: https://charts.min.io
  version: 5.2.0
-digest: sha256:297f462812b6436834d8b82a028840bd55bd9e935b3d0a3e8206ac54a113be01
-generated: "2024-05-07T09:22:16.438693788Z"
+digest: sha256:34e3ee022d624940e12328761d08bffaaa44bf3b2f5ac11bdf18a7818cfc6f46
+generated: "2024-05-14T07:03:44.771380539Z"
--- a/charts/meta-monitoring/Chart.yaml
+++ b/charts/meta-monitoring/Chart.yaml
@@ -13,7 +13,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.0.2
+version: 0.0.3
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
@@ -22,18 +22,18 @@ appVersion: "0.0.1"
 dependencies:
 - name: loki
  repository: https://grafana.github.io/helm-charts
-  version: 6.5.0
+  version: 6.5.2
  condition: local.logs.enabled
 - name: alloy
  repository: https://grafana.github.io/helm-charts
-  version: 0.1.1
+  version: 0.2.0
 - name: mimir-distributed
  repository: https://grafana.github.io/helm-charts
  version: 5.3.0
  condition: local.metrics.enabled
 - name: tempo-distributed
  repository: https://grafana.github.io/helm-charts
-  version: 1.9.7
+  version: 1.9.9
  condition: local.traces.enabled
 - name: minio
  repository: https://charts.min.io
--- a/charts/meta-monitoring/charts/alloy-0.1.1.tgz
+++ b/charts/meta-monitoring/charts/alloy-0.1.1.tgz
--- a/charts/meta-monitoring/charts/alloy-0.2.0.tgz
+++ b/charts/meta-monitoring/charts/alloy-0.2.0.tgz
--- a/charts/meta-monitoring/charts/loki-6.5.0.tgz
+++ b/charts/meta-monitoring/charts/loki-6.5.0.tgz
--- a/charts/meta-monitoring/charts/loki-6.5.2.tgz
+++ b/charts/meta-monitoring/charts/loki-6.5.2.tgz
--- a/charts/meta-monitoring/charts/tempo-distributed-1.9.7.tgz
+++ b/charts/meta-monitoring/charts/tempo-distributed-1.9.7.tgz
--- a/charts/meta-monitoring/charts/tempo-distributed-1.9.9.tgz
+++ b/charts/meta-monitoring/charts/tempo-distributed-1.9.9.tgz
--- a/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json
+++ b/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json
--- a/charts/meta-monitoring/src/dashboards/agent-operational.json
+++ b/charts/meta-monitoring/src/dashboards/agent-operational.json
--- a/charts/meta-monitoring/src/dashboards/agent-remote-write.json
+++ b/charts/meta-monitoring/src/dashboards/agent-remote-write.json
--- a/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json
+++ b/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json
--- a/charts/meta-monitoring/src/dashboards/agent.json
+++ b/charts/meta-monitoring/src/dashboards/agent.json
@@ -1,786 +0,0 @@
-{
-   "annotations": {
-      "list": [ ]
-   },
-   "editable": true,
-   "gnetId": null,
-   "graphTooltip": 0,
-   "hideControls": false,
-   "links": [ ],
-   "refresh": "30s",
-   "rows": [
-      {
-         "collapse": false,
-         "height": "250px",
-         "panels": [
-            {
-               "aliasColors": { },
-               "bars": false,
-               "dashLength": 10,
-               "dashes": false,
-               "datasource": "$datasource",
-               "fill": 1,
-               "id": 1,
-               "legend": {
-                  "avg": false,
-                  "current": false,
-                  "max": false,
-                  "min": false,
-                  "show": true,
-                  "total": false,
-                  "values": false
-               },
-               "lines": true,
-               "linewidth": 1,
-               "links": [ ],
-               "nullPointMode": "null as zero",
-               "percentage": false,
-               "pointradius": 5,
-               "points": false,
-               "renderer": "flot",
-               "seriesOverrides": [ ],
-               "spaceLength": 10,
-               "span": 12,
-               "stack": false,
-               "steppedLine": false,
-               "styles": [
-                  {
-                     "alias": "Time",
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "pattern": "Time",
-                     "type": "hidden"
-                  },
-                  {
-                     "alias": "Count",
-                     "colorMode": null,
-                     "colors": [ ],
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "decimals": 2,
-                     "link": false,
-                     "linkTargetBlank": false,
-                     "linkTooltip": "Drill down",
-                     "linkUrl": "",
-                     "pattern": "Value #A",
-                     "thresholds": [ ],
-                     "type": "hidden",
-                     "unit": "short"
-                  },
-                  {
-                     "alias": "Uptime",
-                     "colorMode": null,
-                     "colors": [ ],
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "decimals": 2,
-                     "link": false,
-                     "linkTargetBlank": false,
-                     "linkTooltip": "Drill down",
-                     "linkUrl": "",
-                     "pattern": "Value #B",
-                     "thresholds": [ ],
-                     "type": "number",
-                     "unit": "short"
-                  },
-                  {
-                     "alias": "Container",
-                     "colorMode": null,
-                     "colors": [ ],
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "decimals": 2,
-                     "link": false,
-                     "linkTargetBlank": false,
-                     "linkTooltip": "Drill down",
-                     "linkUrl": "",
-                     "pattern": "container",
-                     "thresholds": [ ],
-                     "type": "number",
-                     "unit": "short"
-                  },
-                  {
-                     "alias": "Pod",
-                     "colorMode": null,
-                     "colors": [ ],
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "decimals": 2,
-                     "link": false,
-                     "linkTargetBlank": false,
-                     "linkTooltip": "Drill down",
-                     "linkUrl": "",
-                     "pattern": "pod",
-                     "thresholds": [ ],
-                     "type": "number",
-                     "unit": "short"
-                  },
-                  {
-                     "alias": "Version",
-                     "colorMode": null,
-                     "colors": [ ],
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "decimals": 2,
-                     "link": false,
-                     "linkTargetBlank": false,
-                     "linkTooltip": "Drill down",
-                     "linkUrl": "",
-                     "pattern": "version",
-                     "thresholds": [ ],
-                     "type": "number",
-                     "unit": "short"
-                  },
-                  {
-                     "alias": "",
-                     "colorMode": null,
-                     "colors": [ ],
-                     "dateFormat": "YYYY-MM-DD HH:mm:ss",
-                     "decimals": 2,
-                     "pattern": "/.*/",
-                     "thresholds": [ ],
-                     "type": "string",
-                     "unit": "short"
-                  }
-               ],
-               "targets": [
-                  {
-                     "expr": "count by (pod, container, version) (agent_build_info{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})",
-                     "format": "table",
-                     "instant": true,
-                     "intervalFactor": 2,
-                     "legendFormat": "",
-                     "refId": "A",
-                     "step": 10
-                  },
-                  {
-                     "expr": "max by (pod, container) (time() - process_start_time_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})",
-                     "format": "table",
-                     "instant": true,
-                     "intervalFactor": 2,
-                     "legendFormat": "",
-                     "refId": "B",
-                     "step": 10
-                  }
-               ],
-               "thresholds": [ ],
-               "timeFrom": null,
-               "timeShift": null,
-               "title": "Agent Stats",
-               "tooltip": {
-                  "shared": true,
-                  "sort": 2,
-                  "value_type": "individual"
-               },
-               "transform": "table",
-               "type": "table",
-               "xaxis": {
-                  "buckets": null,
-                  "mode": "time",
-                  "name": null,
-                  "show": true,
-                  "values": [ ]
-               },
-               "yaxes": [
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": 0,
-                     "show": true
-                  },
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": null,
-                     "show": false
-                  }
-               ]
-            }
-         ],
-         "repeat": null,
-         "repeatIteration": null,
-         "repeatRowId": null,
-         "showTitle": true,
-         "title": "Agent Stats",
-         "titleSize": "h6"
-      },
-      {
-         "collapse": false,
-         "height": "250px",
-         "panels": [
-            {
-               "aliasColors": { },
-               "bars": false,
-               "dashLength": 10,
-               "dashes": false,
-               "datasource": "$datasource",
-               "fill": 1,
-               "id": 2,
-               "legend": {
-                  "avg": false,
-                  "current": false,
-                  "max": false,
-                  "min": false,
-                  "show": true,
-                  "total": false,
-                  "values": false
-               },
-               "lines": true,
-               "linewidth": 1,
-               "links": [ ],
-               "nullPointMode": "null as zero",
-               "percentage": false,
-               "pointradius": 5,
-               "points": false,
-               "renderer": "flot",
-               "seriesOverrides": [ ],
-               "spaceLength": 10,
-               "span": 6,
-               "stack": false,
-               "steppedLine": false,
-               "targets": [
-                  {
-                     "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])) by (pod, scrape_job) * 1e3",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "{{pod}}/{{scrape_job}}",
-                     "legendLink": null,
-                     "step": 10
-                  }
-               ],
-               "thresholds": [ ],
-               "timeFrom": null,
-               "timeShift": null,
-               "title": "Target Sync",
-               "tooltip": {
-                  "shared": true,
-                  "sort": 2,
-                  "value_type": "individual"
-               },
-               "type": "graph",
-               "xaxis": {
-                  "buckets": null,
-                  "mode": "time",
-                  "name": null,
-                  "show": true,
-                  "values": [ ]
-               },
-               "yaxes": [
-                  {
-                     "format": "ms",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": 0,
-                     "show": true
-                  },
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": null,
-                     "show": false
-                  }
-               ]
-            },
-            {
-               "aliasColors": { },
-               "bars": false,
-               "dashLength": 10,
-               "dashes": false,
-               "datasource": "$datasource",
-               "fill": 10,
-               "id": 3,
-               "legend": {
-                  "avg": false,
-                  "current": false,
-                  "max": false,
-                  "min": false,
-                  "show": true,
-                  "total": false,
-                  "values": false
-               },
-               "lines": true,
-               "linewidth": 0,
-               "links": [ ],
-               "nullPointMode": "null as zero",
-               "percentage": false,
-               "pointradius": 5,
-               "points": false,
-               "renderer": "flot",
-               "seriesOverrides": [ ],
-               "spaceLength": 10,
-               "span": 6,
-               "stack": true,
-               "steppedLine": false,
-               "targets": [
-                  {
-                     "expr": "sum by (pod) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "{{pod}}",
-                     "legendLink": null,
-                     "step": 10
-                  }
-               ],
-               "thresholds": [ ],
-               "timeFrom": null,
-               "timeShift": null,
-               "title": "Targets",
-               "tooltip": {
-                  "shared": true,
-                  "sort": 2,
-                  "value_type": "individual"
-               },
-               "type": "graph",
-               "xaxis": {
-                  "buckets": null,
-                  "mode": "time",
-                  "name": null,
-                  "show": true,
-                  "values": [ ]
-               },
-               "yaxes": [
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": 0,
-                     "show": true
-                  },
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": null,
-                     "show": false
-                  }
-               ]
-            }
-         ],
-         "repeat": null,
-         "repeatIteration": null,
-         "repeatRowId": null,
-         "showTitle": true,
-         "title": "Prometheus Discovery",
-         "titleSize": "h6"
-      },
-      {
-         "collapse": false,
-         "height": "250px",
-         "panels": [
-            {
-               "aliasColors": { },
-               "bars": false,
-               "dashLength": 10,
-               "dashes": false,
-               "datasource": "$datasource",
-               "fill": 1,
-               "id": 4,
-               "legend": {
-                  "avg": false,
-                  "current": false,
-                  "max": false,
-                  "min": false,
-                  "show": true,
-                  "total": false,
-                  "values": false
-               },
-               "lines": true,
-               "linewidth": 1,
-               "links": [ ],
-               "nullPointMode": "null as zero",
-               "percentage": false,
-               "pointradius": 5,
-               "points": false,
-               "renderer": "flot",
-               "seriesOverrides": [ ],
-               "spaceLength": 10,
-               "span": 4,
-               "stack": false,
-               "steppedLine": false,
-               "targets": [
-                  {
-                     "expr": "rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n/\nrate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n* 1e3\n",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "{{pod}} {{interval}} configured",
-                     "legendLink": null,
-                     "step": 10
-                  }
-               ],
-               "thresholds": [ ],
-               "timeFrom": null,
-               "timeShift": null,
-               "title": "Average Scrape Interval Duration",
-               "tooltip": {
-                  "shared": true,
-                  "sort": 2,
-                  "value_type": "individual"
-               },
-               "type": "graph",
-               "xaxis": {
-                  "buckets": null,
-                  "mode": "time",
-                  "name": null,
-                  "show": true,
-                  "values": [ ]
-               },
-               "yaxes": [
-                  {
-                     "format": "ms",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": 0,
-                     "show": true
-                  },
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": null,
-                     "show": false
-                  }
-               ]
-            },
-            {
-               "aliasColors": { },
-               "bars": false,
-               "dashLength": 10,
-               "dashes": false,
-               "datasource": "$datasource",
-               "fill": 10,
-               "id": 5,
-               "legend": {
-                  "avg": false,
-                  "current": false,
-                  "max": false,
-                  "min": false,
-                  "show": true,
-                  "total": false,
-                  "values": false
-               },
-               "lines": true,
-               "linewidth": 0,
-               "links": [ ],
-               "nullPointMode": "null as zero",
-               "percentage": false,
-               "pointradius": 5,
-               "points": false,
-               "renderer": "flot",
-               "seriesOverrides": [ ],
-               "spaceLength": 10,
-               "span": 4,
-               "stack": true,
-               "steppedLine": false,
-               "targets": [
-                  {
-                     "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "exceeded sample limit: {{job}}",
-                     "legendLink": null,
-                     "step": 10
-                  },
-                  {
-                     "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "duplicate timestamp: {{job}}",
-                     "legendLink": null,
-                     "step": 10
-                  },
-                  {
-                     "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "out of bounds: {{job}}",
-                     "legendLink": null,
-                     "step": 10
-                  },
-                  {
-                     "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "out of order: {{job}}",
-                     "legendLink": null,
-                     "step": 10
-                  }
-               ],
-               "thresholds": [ ],
-               "timeFrom": null,
-               "timeShift": null,
-               "title": "Scrape failures",
-               "tooltip": {
-                  "shared": true,
-                  "sort": 2,
-                  "value_type": "individual"
-               },
-               "type": "graph",
-               "xaxis": {
-                  "buckets": null,
-                  "mode": "time",
-                  "name": null,
-                  "show": true,
-                  "values": [ ]
-               },
-               "yaxes": [
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": 0,
-                     "show": true
-                  },
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": null,
-                     "show": false
-                  }
-               ]
-            },
-            {
-               "aliasColors": { },
-               "bars": false,
-               "dashLength": 10,
-               "dashes": false,
-               "datasource": "$datasource",
-               "fill": 10,
-               "id": 6,
-               "legend": {
-                  "avg": false,
-                  "current": false,
-                  "max": false,
-                  "min": false,
-                  "show": true,
-                  "total": false,
-                  "values": false
-               },
-               "lines": true,
-               "linewidth": 0,
-               "links": [ ],
-               "nullPointMode": "null as zero",
-               "percentage": false,
-               "pointradius": 5,
-               "points": false,
-               "renderer": "flot",
-               "seriesOverrides": [ ],
-               "spaceLength": 10,
-               "span": 4,
-               "stack": true,
-               "steppedLine": false,
-               "targets": [
-                  {
-                     "expr": "sum by (job, instance_group_name) (rate(agent_wal_samples_appended_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m]))",
-                     "format": "time_series",
-                     "intervalFactor": 2,
-                     "legendFormat": "{{job}} {{instance_group_name}}",
-                     "legendLink": null,
-                     "step": 10
-                  }
-               ],
-               "thresholds": [ ],
-               "timeFrom": null,
-               "timeShift": null,
-               "title": "Appended Samples",
-               "tooltip": {
-                  "shared": true,
-                  "sort": 2,
-                  "value_type": "individual"
-               },
-               "type": "graph",
-               "xaxis": {
-                  "buckets": null,
-                  "mode": "time",
-                  "name": null,
-                  "show": true,
-                  "values": [ ]
-               },
-               "yaxes": [
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": 0,
-                     "show": true
-                  },
-                  {
-                     "format": "short",
-                     "label": null,
-                     "logBase": 1,
-                     "max": null,
-                     "min": null,
-                     "show": false
-                  }
-               ]
-            }
-         ],
-         "repeat": null,
-         "repeatIteration": null,
-         "repeatRowId": null,
-         "showTitle": true,
-         "title": "Prometheus Retrieval",
-         "titleSize": "h6"
-      }
-   ],
-   "schemaVersion": 14,
-   "style": "dark",
-   "tags": [
-      "grafana-agent-mixin"
-   ],
-   "templating": {
-      "list": [
-         {
-            "current": {
-               "text": "default",
-               "value": "default"
-            },
-            "hide": 0,
-            "label": "Data Source",
-            "name": "datasource",
-            "options": [ ],
-            "query": "prometheus",
-            "refresh": 1,
-            "regex": "",
-            "type": "datasource"
-         },
-         {
-            "allValue": ".+",
-            "current": {
-               "selected": true,
-               "text": "All",
-               "value": "$__all"
-            },
-            "datasource": "$datasource",
-            "hide": 0,
-            "includeAll": true,
-            "label": "cluster",
-            "multi": true,
-            "name": "cluster",
-            "options": [ ],
-            "query": "label_values(agent_build_info, cluster)",
-            "refresh": 1,
-            "regex": "",
-            "sort": 2,
-            "tagValuesQuery": "",
-            "tags": [ ],
-            "tagsQuery": "",
-            "type": "query",
-            "useTags": false
-         },
-         {
-            "allValue": ".+",
-            "current": {
-               "selected": true,
-               "text": "All",
-               "value": "$__all"
-            },
-            "datasource": "$datasource",
-            "hide": 0,
-            "includeAll": true,
-            "label": "namespace",
-            "multi": true,
-            "name": "namespace",
-            "options": [ ],
-            "query": "label_values(agent_build_info, namespace)",
-            "refresh": 1,
-            "regex": "",
-            "sort": 2,
-            "tagValuesQuery": "",
-            "tags": [ ],
-            "tagsQuery": "",
-            "type": "query",
-            "useTags": false
-         },
-         {
-            "allValue": ".+",
-            "current": {
-               "selected": true,
-               "text": "All",
-               "value": "$__all"
-            },
-            "datasource": "$datasource",
-            "hide": 0,
-            "includeAll": true,
-            "label": "container",
-            "multi": true,
-            "name": "container",
-            "options": [ ],
-            "query": "label_values(agent_build_info, container)",
-            "refresh": 1,
-            "regex": "",
-            "sort": 2,
-            "tagValuesQuery": "",
-            "tags": [ ],
-            "tagsQuery": "",
-            "type": "query",
-            "useTags": false
-         },
-         {
-            "allValue": "grafana-agent-.*",
-            "current": {
-               "selected": true,
-               "text": "All",
-               "value": "$__all"
-            },
-            "datasource": "$datasource",
-            "hide": 0,
-            "includeAll": true,
-            "label": "pod",
-            "multi": true,
-            "name": "pod",
-            "options": [ ],
-            "query": "label_values(agent_build_info{container=~\"$container\"}, pod)",
-            "refresh": 1,
-            "regex": "",
-            "sort": 2,
-            "tagValuesQuery": "",
-            "tags": [ ],
-            "tagsQuery": "",
-            "type": "query",
-            "useTags": false
-         }
-      ]
-   },
-   "time": {
-      "from": "now-1h",
-      "to": "now"
-   },
-   "timepicker": {
-      "refresh_intervals": [
-         "5s",
-         "10s",
-         "30s",
-         "1m",
-         "5m",
-         "15m",
-         "30m",
-         "1h",
-         "2h",
-         "1d"
-      ],
-      "time_options": [
-         "5m",
-         "15m",
-         "1h",
-         "6h",
-         "12h",
-         "24h",
-         "2d",
-         "7d",
-         "30d"
-      ]
-   },
-   "timezone": "",
-   "title": "Agent",
-   "uid": "",
-   "version": 0
-}
--- a/charts/meta-monitoring/src/dashboards/alloy-cluster-node.json
+++ b/charts/meta-monitoring/src/dashboards/alloy-cluster-node.json
--- a/charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json
+++ b/charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json
@@ -0,0 +1,540 @@
+{
+    "annotations": {
+      "list": [
+        {
+          "builtIn": 1,
+          "datasource": {
+            "type": "grafana",
+            "uid": "-- Grafana --"
+          },
+          "enable": true,
+          "hide": true,
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "name": "Annotations & Alerts",
+          "type": "dashboard"
+        },
+        {
+          "datasource": "$loki_datasource",
+          "enable": true,
+          "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"",
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "instant": false,
+          "name": "Deployments",
+          "titleFormat": "{{cluster}}/{{namespace}}"
+        }
+      ]
+    },
+    "editable": true,
+    "fiscalYearStartMonth": 0,
+    "graphTooltip": 1,
+    "id": 27,
+    "links": [
+      {
+        "icon": "doc",
+        "targetBlank": true,
+        "title": "Documentation",
+        "tooltip": "Clustering documentation",
+        "type": "link",
+        "url": "https://grafana.com/docs/alloy/latest/reference/cli/run/#clustered-mode"
+      },
+      {
+        "asDropdown": true,
+        "icon": "external link",
+        "includeVars": true,
+        "keepTime": true,
+        "tags": [
+          "alloy-mixin"
+        ],
+        "targetBlank": false,
+        "title": "Dashboards",
+        "type": "dashboards"
+      }
+    ],
+    "panels": [
+      {
+        "datasource": "${datasource}",
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 9,
+          "w": 8,
+          "x": 0,
+          "y": 0
+        },
+        "id": 1,
+        "options": {
+          "colorMode": "value",
+          "graphMode": "area",
+          "justifyMode": "auto",
+          "orientation": "auto",
+          "reduceOptions": {
+            "calcs": [
+              "lastNotNull"
+            ],
+            "fields": "",
+            "values": false
+          },
+          "showPercentChange": false,
+          "textMode": "auto",
+          "wideLayout": true
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})",
+            "instant": true,
+            "legendFormat": "__auto",
+            "range": false,
+            "refId": "A"
+          }
+        ],
+        "title": "Nodes",
+        "type": "stat"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Nodes info.\n",
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "align": "auto",
+              "cellOptions": {
+                "type": "auto"
+              },
+              "inspect": false
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": [
+            {
+              "matcher": {
+                "id": "byName",
+                "options": "Dashboard"
+              },
+              "properties": [
+                {
+                  "id": "mappings",
+                  "value": [
+                    {
+                      "options": {
+                        "1": {
+                          "index": 0,
+                          "text": "Link"
+                        }
+                      },
+                      "type": "value"
+                    }
+                  ]
+                },
+                {
+                  "id": "links",
+                  "value": [
+                    {
+                      "targetBlank": false,
+                      "title": "Detail dashboard for node",
+                      "url": "/d/4047e755d822da63c8158cde32ae4dce/alloy-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}"
+                    }
+                  ]
+                }
+              ]
+            }
+          ]
+        },
+        "gridPos": {
+          "h": 9,
+          "w": 16,
+          "x": 8,
+          "y": 0
+        },
+        "id": 2,
+        "options": {
+          "cellHeight": "sm",
+          "footer": {
+            "countRows": false,
+            "fields": "",
+            "reducer": [
+              "sum"
+            ],
+            "show": false
+          },
+          "showHeader": true
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}",
+            "format": "table",
+            "instant": true,
+            "legendFormat": "__auto",
+            "range": false,
+            "refId": "A"
+          }
+        ],
+        "title": "Node table",
+        "transformations": [
+          {
+            "id": "organize",
+            "options": {
+              "excludeByName": {
+                "Time": true,
+                "Value": false,
+                "__name__": true,
+                "cluster": true,
+                "namespace": true,
+                "state": false
+              },
+              "indexByName": {},
+              "renameByName": {
+                "Value": "Dashboard",
+                "instance": "",
+                "state": ""
+              }
+            }
+          }
+        ],
+        "type": "table"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n",
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [
+              {
+                "options": {
+                  "1": {
+                    "color": "red",
+                    "index": 1,
+                    "text": "Not converged"
+                  }
+                },
+                "type": "value"
+              },
+              {
+                "options": {
+                  "match": "null",
+                  "result": {
+                    "color": "green",
+                    "index": 0,
+                    "text": "Converged"
+                  }
+                },
+                "type": "special"
+              }
+            ],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "suffix:nodes"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 9,
+          "w": 8,
+          "x": 0,
+          "y": 9
+        },
+        "id": 3,
+        "options": {
+          "colorMode": "background",
+          "graphMode": "none",
+          "justifyMode": "auto",
+          "orientation": "auto",
+          "reduceOptions": {
+            "calcs": [
+              "lastNotNull"
+            ],
+            "fields": "",
+            "values": false
+          },
+          "showPercentChange": false,
+          "textMode": "auto",
+          "wideLayout": true
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "clamp((\n  sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n  (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n  ),\n  1, 1\n)\n",
+            "format": "time_series",
+            "instant": true,
+            "legendFormat": "__auto",
+            "range": false,
+            "refId": "A"
+          }
+        ],
+        "title": "Convergance state",
+        "type": "stat"
+      },
+      {
+        "datasource": "${datasource}",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "continuous-GrYlRd"
+            },
+            "custom": {
+              "fillOpacity": 80,
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineWidth": 0,
+              "spanNulls": true
+            },
+            "mappings": [
+              {
+                "options": {
+                  "0": {
+                    "color": "green",
+                    "text": "Yes"
+                  }
+                },
+                "type": "value"
+              },
+              {
+                "options": {
+                  "1": {
+                    "color": "red",
+                    "text": "No"
+                  }
+                },
+                "type": "value"
+              }
+            ],
+            "max": 1,
+            "noValue": 0,
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 9,
+          "w": 16,
+          "x": 8,
+          "y": 9
+        },
+        "id": 4,
+        "options": {
+          "alignValue": "left",
+          "legend": {
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "mergeValues": true,
+          "rowHeight": 0.9,
+          "showValue": "auto",
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "ceil(clamp((\n  sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n  (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n  ),\n  0, 1\n))\n",
+            "instant": false,
+            "legendFormat": "Converged",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Convergance state timeline",
+        "type": "state-timeline"
+      }
+    ],
+    "refresh": "10s",
+    "schemaVersion": 39,
+    "tags": [
+      "alloy-mixin"
+    ],
+    "templating": {
+      "list": [
+        {
+          "current": {
+            "selected": false,
+            "text": "Mimir",
+            "value": "mimir_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Data Source",
+          "multi": false,
+          "name": "datasource",
+          "options": [],
+          "query": "prometheus",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "selected": false,
+            "text": "Loki",
+            "value": "loki_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Loki Data Source",
+          "multi": false,
+          "name": "loki_datasource",
+          "options": [],
+          "query": "loki",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "cluster",
+          "multi": false,
+          "name": "cluster",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components, cluster)\n",
+            "refId": "cluster"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "namespace",
+          "multi": false,
+          "name": "namespace",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
+            "refId": "namespace"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        }
+      ]
+    },
+    "time": {
+      "from": "now-1h",
+      "to": "now"
+    },
+    "timepicker": {
+      "refresh_intervals": [
+        "5s",
+        "10s",
+        "30s",
+        "1m",
+        "5m",
+        "15m",
+        "30m",
+        "1h",
+        "2h",
+        "1d"
+      ],
+      "time_options": [
+        "5m",
+        "15m",
+        "1h",
+        "6h",
+        "12h",
+        "24h",
+        "2d",
+        "7d",
+        "30d",
+        "90d"
+      ]
+    },
+    "timezone": "",
+    "title": "Alloy / Cluster Overview",
+    "uid": "",
+    "version": 0,
+    "weekStart": ""
+  }
--- a/charts/meta-monitoring/src/dashboards/alloy-controller.json
+++ b/charts/meta-monitoring/src/dashboards/alloy-controller.json
@@ -0,0 +1,970 @@
+{
+    "annotations": {
+      "list": [
+        {
+          "builtIn": 1,
+          "datasource": {
+            "type": "grafana",
+            "uid": "-- Grafana --"
+          },
+          "enable": true,
+          "hide": true,
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "name": "Annotations & Alerts",
+          "type": "dashboard"
+        },
+        {
+          "datasource": "$loki_datasource",
+          "enable": true,
+          "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"",
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "instant": false,
+          "name": "Deployments",
+          "titleFormat": "{{cluster}}/{{namespace}}"
+        }
+      ]
+    },
+    "editable": true,
+    "fiscalYearStartMonth": 0,
+    "graphTooltip": 1,
+    "id": 28,
+    "links": [
+      {
+        "icon": "doc",
+        "targetBlank": true,
+        "title": "Documentation",
+        "tooltip": "Component controller documentation",
+        "type": "link",
+        "url": "https://grafana.com/docs/alloy/latest/concepts/component_controller/"
+      },
+      {
+        "asDropdown": true,
+        "icon": "external link",
+        "includeVars": true,
+        "keepTime": true,
+        "tags": [
+          "alloy-mixin"
+        ],
+        "targetBlank": false,
+        "title": "Dashboards",
+        "type": "dashboards"
+      }
+    ],
+    "panels": [
+      {
+        "datasource": "${datasource}",
+        "description": "The number of Alloy instances whose metrics are being sent and reported.\n",
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "instances"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 4,
+          "w": 10,
+          "x": 0,
+          "y": 0
+        },
+        "id": 1,
+        "options": {
+          "colorMode": "none",
+          "graphMode": "none",
+          "justifyMode": "auto",
+          "orientation": "auto",
+          "reduceOptions": {
+            "calcs": [
+              "lastNotNull"
+            ],
+            "fields": "",
+            "values": false
+          },
+          "showPercentChange": false,
+          "textMode": "auto",
+          "wideLayout": true
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "count(alloy_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})",
+            "instant": false,
+            "legendFormat": "__auto",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Running instances",
+        "type": "stat"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Breakdown of components by health across all running instances.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Alloy UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n",
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [],
+            "min": 0,
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                }
+              ]
+            }
+          },
+          "overrides": [
+            {
+              "matcher": {
+                "id": "byName",
+                "options": "Unhealthy"
+              },
+              "properties": [
+                {
+                  "id": "thresholds",
+                  "value": {
+                    "mode": "absolute",
+                    "steps": [
+                      {
+                        "color": "green",
+                        "value": null
+                      },
+                      {
+                        "color": "red",
+                        "value": 1
+                      }
+                    ]
+                  }
+                }
+              ]
+            },
+            {
+              "matcher": {
+                "id": "byName",
+                "options": "Unknown"
+              },
+              "properties": [
+                {
+                  "id": "thresholds",
+                  "value": {
+                    "mode": "absolute",
+                    "steps": [
+                      {
+                        "color": "green",
+                        "value": null
+                      },
+                      {
+                        "color": "blue",
+                        "value": 1
+                      }
+                    ]
+                  }
+                }
+              ]
+            },
+            {
+              "matcher": {
+                "id": "byName",
+                "options": "Exited"
+              },
+              "properties": [
+                {
+                  "id": "thresholds",
+                  "value": {
+                    "mode": "absolute",
+                    "steps": [
+                      {
+                        "color": "green",
+                        "value": null
+                      },
+                      {
+                        "color": "orange",
+                        "value": 1
+                      }
+                    ]
+                  }
+                }
+              ]
+            }
+          ]
+        },
+        "gridPos": {
+          "h": 12,
+          "w": 14,
+          "x": 10,
+          "y": 0
+        },
+        "id": 4,
+        "options": {
+          "displayMode": "gradient",
+          "maxVizHeight": 300,
+          "minVizHeight": 16,
+          "minVizWidth": 8,
+          "namePlacement": "auto",
+          "orientation": "vertical",
+          "reduceOptions": {
+            "calcs": [
+              "lastNotNull"
+            ],
+            "fields": "",
+            "values": false
+          },
+          "showUnfilled": true,
+          "sizing": "auto",
+          "valueMode": "color"
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)",
+            "instant": true,
+            "legendFormat": "Healthy",
+            "range": false,
+            "refId": "A"
+          },
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)",
+            "instant": true,
+            "legendFormat": "Unhealthy",
+            "range": false,
+            "refId": "B"
+          },
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)",
+            "instant": true,
+            "legendFormat": "Unknown",
+            "range": false,
+            "refId": "C"
+          },
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)",
+            "instant": true,
+            "legendFormat": "Exited",
+            "range": false,
+            "refId": "D"
+          }
+        ],
+        "title": "Components by health",
+        "type": "bargauge"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "The number of running components across all running instances.\n",
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "components"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 4,
+          "w": 10,
+          "x": 0,
+          "y": 4
+        },
+        "id": 2,
+        "options": {
+          "colorMode": "none",
+          "graphMode": "none",
+          "justifyMode": "auto",
+          "orientation": "auto",
+          "reduceOptions": {
+            "calcs": [
+              "lastNotNull"
+            ],
+            "fields": "",
+            "values": false
+          },
+          "showPercentChange": false,
+          "textMode": "auto",
+          "wideLayout": true
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})",
+            "instant": false,
+            "legendFormat": "__auto",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Running components",
+        "type": "stat"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "The percentage of components which are in a healthy state.\n",
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [],
+            "max": 1,
+            "min": 0,
+            "noValue": "No components",
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "percentunit"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 4,
+          "w": 10,
+          "x": 0,
+          "y": 8
+        },
+        "id": 3,
+        "options": {
+          "colorMode": "value",
+          "graphMode": "area",
+          "justifyMode": "auto",
+          "orientation": "auto",
+          "reduceOptions": {
+            "calcs": [
+              "lastNotNull"
+            ],
+            "fields": "",
+            "values": false
+          },
+          "showPercentChange": false,
+          "text": {
+            "valueSize": 80
+          },
+          "textMode": "auto",
+          "wideLayout": true
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n",
+            "instant": false,
+            "legendFormat": "__auto",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Overall component health",
+        "type": "stat"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "The frequency at which components get updated.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "points",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 3,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "ops"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 0,
+          "y": 12
+        },
+        "id": 5,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "multi",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum by (instance) (rate(alloy_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+            "instant": false,
+            "legendFormat": "__auto",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Component evaluation rate",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "s"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 8,
+          "y": 12
+        },
+        "id": 6,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "histogram_quantile(0.99, sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n",
+            "instant": false,
+            "legendFormat": "99th percentile",
+            "range": true,
+            "refId": "A"
+          },
+          {
+            "datasource": "${datasource}",
+            "expr": "histogram_quantile(0.50, sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n",
+            "instant": false,
+            "legendFormat": "50th percentile",
+            "range": true,
+            "refId": "B"
+          },
+          {
+            "datasource": "${datasource}",
+            "expr": "(\n  histogram_sum(sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n  histogram_count(sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n  sum(rate(alloy_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n  sum(rate(alloy_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n",
+            "instant": false,
+            "legendFormat": "Average",
+            "range": true,
+            "refId": "C"
+          }
+        ],
+        "title": "Component evaluation time",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "percentunit"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 16,
+          "y": 12
+        },
+        "id": 7,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum by (component_path, component_id) (rate(alloy_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(alloy_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n",
+            "instant": false,
+            "legendFormat": "{{component path}} {{component_id}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Slow components evaluation times",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n",
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "scaleDistribution": {
+                "type": "linear"
+              }
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 0,
+          "y": 22
+        },
+        "id": 8,
+        "maxDataPoints": 30,
+        "options": {
+          "calculate": false,
+          "cellGap": 0,
+          "color": {
+            "exponent": 0.5,
+            "fill": "dark-orange",
+            "mode": "scheme",
+            "reverse": false,
+            "scale": "exponential",
+            "scheme": "Spectral",
+            "steps": 64
+          },
+          "exemplars": {
+            "color": "rgba(255,0,255,0.7)"
+          },
+          "filterValues": {
+            "le": 0.1
+          },
+          "legend": {
+            "show": true
+          },
+          "rowsFrame": {
+            "layout": "auto"
+          },
+          "tooltip": {
+            "mode": "single",
+            "showColorScale": false,
+            "yHistogram": true
+          },
+          "yAxis": {
+            "axisPlacement": "left",
+            "reverse": false,
+            "unit": "s"
+          }
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(increase(alloy_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n",
+            "format": "heatmap",
+            "instant": false,
+            "legendFormat": "{{le}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Component evaluation histogram",
+        "type": "heatmap"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n",
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "scaleDistribution": {
+                "type": "linear"
+              }
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 8,
+          "y": 22
+        },
+        "id": 9,
+        "maxDataPoints": 30,
+        "options": {
+          "calculate": false,
+          "cellGap": 0,
+          "color": {
+            "exponent": 0.5,
+            "fill": "dark-orange",
+            "mode": "scheme",
+            "reverse": false,
+            "scale": "exponential",
+            "scheme": "Spectral",
+            "steps": 64
+          },
+          "exemplars": {
+            "color": "rgba(255,0,255,0.7)"
+          },
+          "filterValues": {
+            "le": 0.1
+          },
+          "legend": {
+            "show": true
+          },
+          "rowsFrame": {
+            "layout": "auto"
+          },
+          "tooltip": {
+            "mode": "single",
+            "showColorScale": false,
+            "yHistogram": true
+          },
+          "yAxis": {
+            "axisPlacement": "left",
+            "reverse": false,
+            "unit": "s"
+          }
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum(increase(alloy_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(alloy_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n",
+            "format": "heatmap",
+            "instant": false,
+            "legendFormat": "{{le}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Component dependency wait histogram",
+        "type": "heatmap"
+      }
+    ],
+    "refresh": "10s",
+    "schemaVersion": 39,
+    "tags": [
+      "alloy-mixin"
+    ],
+    "templating": {
+      "list": [
+        {
+          "current": {
+            "selected": false,
+            "text": "Mimir",
+            "value": "mimir_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Data Source",
+          "multi": false,
+          "name": "datasource",
+          "options": [],
+          "query": "prometheus",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "selected": false,
+            "text": "Loki",
+            "value": "loki_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Loki Data Source",
+          "multi": false,
+          "name": "loki_datasource",
+          "options": [],
+          "query": "loki",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "cluster",
+          "multi": false,
+          "name": "cluster",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components, cluster)\n",
+            "refId": "cluster"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "namespace",
+          "multi": false,
+          "name": "namespace",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
+            "refId": "namespace"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        }
+      ]
+    },
+    "time": {
+      "from": "now-1h",
+      "to": "now"
+    },
+    "timepicker": {
+      "refresh_intervals": [
+        "5s",
+        "10s",
+        "30s",
+        "1m",
+        "5m",
+        "15m",
+        "30m",
+        "1h",
+        "2h",
+        "1d"
+      ],
+      "time_options": [
+        "5m",
+        "15m",
+        "1h",
+        "6h",
+        "12h",
+        "24h",
+        "2d",
+        "7d",
+        "30d",
+        "90d"
+      ]
+    },
+    "timezone": "",
+    "title": "Alloy / Controller",
+    "uid": "bf9f456aad7108b2c808dbd9973e386f",
+    "version": 0,
+    "weekStart": ""
+  }
--- a/charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json
+++ b/charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json
@@ -0,0 +1,923 @@
+{
+    "annotations": {
+      "list": [
+        {
+          "builtIn": 1,
+          "datasource": {
+            "type": "grafana",
+            "uid": "-- Grafana --"
+          },
+          "enable": true,
+          "hide": true,
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "name": "Annotations & Alerts",
+          "type": "dashboard"
+        }
+      ]
+    },
+    "editable": true,
+    "fiscalYearStartMonth": 0,
+    "graphTooltip": 1,
+    "id": 25,
+    "links": [
+      {
+        "asDropdown": true,
+        "icon": "external link",
+        "includeVars": true,
+        "keepTime": true,
+        "tags": [
+          "alloy-mixin"
+        ],
+        "targetBlank": false,
+        "title": "Dashboards",
+        "type": "dashboards"
+      }
+    ],
+    "panels": [
+      {
+        "datasource": "${datasource}",
+        "gridPos": {
+          "h": 1,
+          "w": 24,
+          "x": 0,
+          "y": 0
+        },
+        "id": 1,
+        "title": "Receivers for traces [otelcol.receiver]",
+        "type": "row"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of spans successfully pushed into the pipeline.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 20,
+              "gradientMode": "hue",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "normal"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 0,
+          "y": 1
+        },
+        "id": 2,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{ pod }} / {{ transport }}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Accepted spans",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of spans that could not be pushed into the pipeline.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 20,
+              "gradientMode": "hue",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "normal"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 8,
+          "y": 1
+        },
+        "id": 3,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{ pod }} / {{ transport }}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Refused spans",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "The duration of inbound RPCs.\n",
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "scaleDistribution": {
+                "type": "linear"
+              }
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 16,
+          "y": 1
+        },
+        "id": 4,
+        "maxDataPoints": 30,
+        "options": {
+          "calculate": false,
+          "cellGap": 1,
+          "color": {
+            "exponent": 0.5,
+            "fill": "dark-orange",
+            "mode": "scheme",
+            "reverse": false,
+            "scale": "exponential",
+            "scheme": "Oranges",
+            "steps": 65
+          },
+          "exemplars": {
+            "color": "rgba(255,0,255,0.7)"
+          },
+          "filterValues": {
+            "le": 1e-9
+          },
+          "legend": {
+            "show": true
+          },
+          "rowsFrame": {
+            "layout": "auto"
+          },
+          "tooltip": {
+            "mode": "single",
+            "showColorScale": false,
+            "yHistogram": true
+          },
+          "yAxis": {
+            "axisPlacement": "left",
+            "reverse": false,
+            "unit": "ms"
+          }
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))",
+            "format": "heatmap",
+            "instant": false,
+            "legendFormat": "{{le}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "RPC server duration",
+        "type": "heatmap"
+      },
+      {
+        "datasource": "${datasource}",
+        "gridPos": {
+          "h": 1,
+          "w": 24,
+          "x": 0,
+          "y": 11
+        },
+        "id": 5,
+        "title": "Batching of logs, metrics, and traces [otelcol.processor.batch]",
+        "type": "row"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of spans, metric datapoints, or log lines in a batch\n",
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "scaleDistribution": {
+                "type": "linear"
+              }
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 0,
+          "y": 12
+        },
+        "id": 6,
+        "maxDataPoints": 30,
+        "options": {
+          "calculate": false,
+          "cellGap": 1,
+          "color": {
+            "exponent": 0.5,
+            "fill": "dark-orange",
+            "mode": "scheme",
+            "reverse": false,
+            "scale": "exponential",
+            "scheme": "Oranges",
+            "steps": 65
+          },
+          "exemplars": {
+            "color": "rgba(255,0,255,0.7)"
+          },
+          "filterValues": {
+            "le": 1e-9
+          },
+          "legend": {
+            "show": true
+          },
+          "rowsFrame": {
+            "layout": "auto"
+          },
+          "tooltip": {
+            "mode": "single",
+            "showColorScale": false,
+            "yHistogram": true
+          },
+          "yAxis": {
+            "axisPlacement": "left",
+            "reverse": false,
+            "unit": "short"
+          }
+        },
+        "pluginVersion": "10.4.2",
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))",
+            "format": "heatmap",
+            "instant": false,
+            "legendFormat": "{{le}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Number of units in the batch",
+        "type": "heatmap"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of distinct metadata value combinations being processed\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 8,
+          "y": 12
+        },
+        "id": 7,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n",
+            "instant": false,
+            "legendFormat": "{{ pod }}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Distinct metadata values",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of times the batch was sent due to a timeout trigger\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 16,
+          "y": 12
+        },
+        "id": 8,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{ pod }}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Timeout trigger",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "gridPos": {
+          "h": 1,
+          "w": 24,
+          "x": 0,
+          "y": 22
+        },
+        "id": 9,
+        "title": "Exporters for traces [otelcol.exporter]",
+        "type": "row"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of spans successfully sent to destination.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 20,
+              "gradientMode": "hue",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "normal"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 0,
+          "y": 23
+        },
+        "id": 10,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{ pod }}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Exported sent spans",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of spans in failed attempts to send to destination.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 20,
+              "gradientMode": "hue",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "normal"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            }
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 10,
+          "w": 8,
+          "x": 8,
+          "y": 23
+        },
+        "id": 11,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{ pod }}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Exported failed spans",
+        "type": "timeseries"
+      }
+    ],
+    "refresh": "10s",
+    "schemaVersion": 39,
+    "tags": [
+      "alloy-mixin"
+    ],
+    "templating": {
+      "list": [
+        {
+          "current": {
+            "selected": false,
+            "text": "Mimir",
+            "value": "mimir_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Data Source",
+          "multi": false,
+          "name": "datasource",
+          "options": [],
+          "query": "prometheus",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "selected": false,
+            "text": "Loki",
+            "value": "loki_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Loki Data Source",
+          "multi": false,
+          "name": "loki_datasource",
+          "options": [],
+          "query": "loki",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "cluster",
+          "multi": false,
+          "name": "cluster",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components, cluster)\n",
+            "refId": "cluster"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "namespace",
+          "multi": false,
+          "name": "namespace",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
+            "refId": "namespace"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        },
+        {
+          "allValue": ".*",
+          "current": {
+            "selected": false,
+            "text": "All",
+            "value": "$__all"
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": true,
+          "label": "instance",
+          "multi": true,
+          "name": "instance",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n",
+            "refId": "instance"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        }
+      ]
+    },
+    "time": {
+      "from": "now-1h",
+      "to": "now"
+    },
+    "timepicker": {
+      "refresh_intervals": [
+        "5s",
+        "10s",
+        "30s",
+        "1m",
+        "5m",
+        "15m",
+        "30m",
+        "1h",
+        "2h",
+        "1d"
+      ],
+      "time_options": [
+        "5m",
+        "15m",
+        "1h",
+        "6h",
+        "12h",
+        "24h",
+        "2d",
+        "7d",
+        "30d",
+        "90d"
+      ]
+    },
+    "timezone": "",
+    "title": "Alloy / OpenTelemetry",
+    "uid": "9b6d37c8603e19e8922133984faad93d",
+    "version": 0,
+    "weekStart": ""
+  }
--- a/charts/meta-monitoring/src/dashboards/alloy-prometheus.json
+++ b/charts/meta-monitoring/src/dashboards/alloy-prometheus.json
--- a/charts/meta-monitoring/src/dashboards/alloy-resources.json
+++ b/charts/meta-monitoring/src/dashboards/alloy-resources.json
@@ -0,0 +1,840 @@
+{
+    "annotations": {
+      "list": [
+        {
+          "builtIn": 1,
+          "datasource": {
+            "type": "grafana",
+            "uid": "-- Grafana --"
+          },
+          "enable": true,
+          "hide": true,
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "name": "Annotations & Alerts",
+          "type": "dashboard"
+        },
+        {
+          "datasource": "$loki_datasource",
+          "enable": true,
+          "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"",
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "instant": false,
+          "name": "Deployments",
+          "titleFormat": "{{cluster}}/{{namespace}}"
+        }
+      ]
+    },
+    "editable": true,
+    "fiscalYearStartMonth": 0,
+    "graphTooltip": 1,
+    "id": 26,
+    "links": [
+      {
+        "asDropdown": true,
+        "icon": "external link",
+        "includeVars": true,
+        "keepTime": true,
+        "tags": [
+          "alloy-mixin"
+        ],
+        "targetBlank": false,
+        "title": "Dashboards",
+        "type": "dashboards"
+      }
+    ],
+    "panels": [
+      {
+        "datasource": "${datasource}",
+        "description": "CPU usage of the Alloy process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "percentunit"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 12,
+          "x": 0,
+          "y": 0
+        },
+        "id": 1,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(alloy_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "CPU usage",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Resident memory size of the Alloy process.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "decbytes"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 12,
+          "x": 12,
+          "y": 0
+        },
+        "id": 2,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "alloy_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Memory (RSS)",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Rate at which the Alloy process performs garbage collections.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "points",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 3,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "ops"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 8,
+          "x": 0,
+          "y": 8
+        },
+        "id": 3,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Garbage collections",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "none"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 8,
+          "x": 8,
+          "y": 8
+        },
+        "id": 4,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Goroutines",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Heap memory currently in use by the Alloy process.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 0,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "none"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "decbytes"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 8,
+          "x": 16,
+          "y": 8
+        },
+        "id": 5,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Memory (heap inuse)",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Rate of data received across all network interfaces for the machine\nAlloy is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Alloy process.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 30,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "normal"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "Bps"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 12,
+          "x": 0,
+          "y": 16
+        },
+        "id": 6,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(alloy_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Network receive bandwidth",
+        "type": "timeseries"
+      },
+      {
+        "datasource": "${datasource}",
+        "description": "Rate of data sent across all network interfaces for the machine\nAlloy is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Alloy process.\n",
+        "fieldConfig": {
+          "defaults": {
+            "color": {
+              "mode": "palette-classic"
+            },
+            "custom": {
+              "axisBorderShow": false,
+              "axisCenteredZero": false,
+              "axisColorMode": "text",
+              "axisLabel": "",
+              "axisPlacement": "auto",
+              "barAlignment": 0,
+              "drawStyle": "line",
+              "fillOpacity": 30,
+              "gradientMode": "none",
+              "hideFrom": {
+                "legend": false,
+                "tooltip": false,
+                "viz": false
+              },
+              "insertNulls": false,
+              "lineInterpolation": "linear",
+              "lineWidth": 1,
+              "pointSize": 5,
+              "scaleDistribution": {
+                "type": "linear"
+              },
+              "showPoints": "auto",
+              "spanNulls": false,
+              "stacking": {
+                "group": "A",
+                "mode": "normal"
+              },
+              "thresholdsStyle": {
+                "mode": "off"
+              }
+            },
+            "mappings": [],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {
+                  "color": "green",
+                  "value": null
+                },
+                {
+                  "color": "red",
+                  "value": 80
+                }
+              ]
+            },
+            "unit": "Bps"
+          },
+          "overrides": []
+        },
+        "gridPos": {
+          "h": 8,
+          "w": 12,
+          "x": 12,
+          "y": 16
+        },
+        "id": 7,
+        "options": {
+          "legend": {
+            "calcs": [],
+            "displayMode": "list",
+            "placement": "bottom",
+            "showLegend": true
+          },
+          "tooltip": {
+            "mode": "single",
+            "sort": "none"
+          }
+        },
+        "targets": [
+          {
+            "datasource": "${datasource}",
+            "expr": "rate(alloy_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "range": true,
+            "refId": "A"
+          }
+        ],
+        "title": "Network send bandwidth",
+        "type": "timeseries"
+      }
+    ],
+    "refresh": "10s",
+    "schemaVersion": 39,
+    "tags": [
+      "alloy-mixin"
+    ],
+    "templating": {
+      "list": [
+        {
+          "current": {
+            "selected": false,
+            "text": "Mimir",
+            "value": "mimir_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Data Source",
+          "multi": false,
+          "name": "datasource",
+          "options": [],
+          "query": "prometheus",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "selected": false,
+            "text": "Loki",
+            "value": "loki_ds"
+          },
+          "hide": 0,
+          "includeAll": false,
+          "label": "Loki Data Source",
+          "multi": false,
+          "name": "loki_datasource",
+          "options": [],
+          "query": "loki",
+          "refresh": 1,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "datasource"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "cluster",
+          "multi": false,
+          "name": "cluster",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components, cluster)\n",
+            "refId": "cluster"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        },
+        {
+          "current": {
+            "isNone": true,
+            "selected": false,
+            "text": "None",
+            "value": ""
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": false,
+          "label": "namespace",
+          "multi": false,
+          "name": "namespace",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
+            "refId": "namespace"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        },
+        {
+          "allValue": ".*",
+          "current": {
+            "selected": false,
+            "text": "All",
+            "value": "$__all"
+          },
+          "datasource": {
+            "uid": "${datasource}"
+          },
+          "definition": "",
+          "hide": 0,
+          "includeAll": true,
+          "label": "instance",
+          "multi": true,
+          "name": "instance",
+          "options": [],
+          "query": {
+            "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n",
+            "refId": "instance"
+          },
+          "refresh": 2,
+          "regex": "",
+          "skipUrlSync": false,
+          "sort": 2,
+          "type": "query"
+        }
+      ]
+    },
+    "time": {
+      "from": "now-1h",
+      "to": "now"
+    },
+    "timepicker": {
+      "refresh_intervals": [
+        "5s",
+        "10s",
+        "30s",
+        "1m",
+        "5m",
+        "15m",
+        "30m",
+        "1h",
+        "2h",
+        "1d"
+      ],
+      "time_options": [
+        "5m",
+        "15m",
+        "1h",
+        "6h",
+        "12h",
+        "24h",
+        "2d",
+        "7d",
+        "30d",
+        "90d"
+      ]
+    },
+    "timezone": "",
+    "title": "Alloy / Resources",
+    "uid": "d6a8574c31f3d7cb8f1345ec84d15a67",
+    "version": 0,
+    "weekStart": ""
+  }
--- a/charts/meta-monitoring/src/dashboards/loki-operational.json
+++ b/charts/meta-monitoring/src/dashboards/loki-operational.json
@@ -1824,7 +1824,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write)\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write.*|$namespace-[0-9]+)\"}[$__rate_interval]))",
                        "intervalFactor": 3,
                        "legendFormat": "{{pod}}",
                        "refId": "A"
@@ -1921,7 +1921,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/distributor|(loki|enterprise-logs)-write|.*/loki)\"}",
+                        "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|$namespace/(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}",
                        "instant": false,
                        "intervalFactor": 3,
                        "legendFormat": "{{pod}}",
@@ -2525,7 +2525,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))",
                        "intervalFactor": 3,
                        "legendFormat": "{{pod}}",
                        "refId": "A"
@@ -2622,7 +2622,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}",
+                        "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}",
                        "instant": false,
                        "intervalFactor": 3,
                        "legendFormat": "{{pod}}",
@@ -3308,7 +3308,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval]))",
+                        "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval]))",
                        "interval": "",
                        "legendFormat": "{{ reason }}"
                     }
@@ -3388,7 +3388,7 @@
                  "reverseYBuckets": false,
                  "targets": [
                     {
-                        "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
+                        "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
                        "format": "heatmap",
                        "instant": false,
                        "interval": "",
@@ -3481,7 +3481,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))",
                        "intervalFactor": 3,
                        "legendFormat": "{{pod}}",
                        "refId": "A"
@@ -3578,7 +3578,7 @@
                  "steppedLine": false,
                  "targets": [
                     {
-                        "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|.*loki-single-binary)\"}",
+                        "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|.*loki-single-binary|$namespace-[0-9]+)\"}",
                        "instant": false,
                        "intervalFactor": 3,
                        "legendFormat": "{{pod}}",
--- a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json
+++ b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json
@@ -104,19 +104,19 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})",
+                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -206,19 +206,19 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})",
+                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)",
+                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -269,7 +269,7 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-frontend\"})",
+                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|loki-read|loki-single-binary)\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
@@ -371,19 +371,19 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})",
+                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -473,19 +473,19 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})",
+                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)",
+                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -536,7 +536,7 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-scheduler\"})",
+                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-read|loki-single-binary)\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
@@ -638,19 +638,19 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})",
+                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -740,19 +740,19 @@
                  },
                  "targets": [
                     {
-                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})",
+                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)",
+                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -803,7 +803,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*querier\"})",
+                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-read|loki-single-binary)\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
@@ -854,7 +854,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+                        "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
                        "format": "time_series",
                        "legendFormat": "{{pod}} - {{device}}",
                        "legendLink": null
@@ -902,7 +902,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+                        "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
                        "format": "time_series",
                        "legendFormat": "{{pod}} - {{device}}",
                        "legendLink": null
@@ -1462,19 +1462,19 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
+                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -1564,19 +1564,19 @@
                  },
                  "targets": [
                     {
-                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
+                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)",
+                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -1627,7 +1627,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*bloom-gateway\"})",
+                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|loki-read|loki-single-binary)\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
@@ -1678,7 +1678,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+                        "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
                        "format": "time_series",
                        "legendFormat": "{{pod}} - {{device}}",
                        "legendLink": null
@@ -1726,7 +1726,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+                        "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
                        "format": "time_series",
                        "legendFormat": "{{pod}} - {{device}}",
                        "legendLink": null
@@ -2189,19 +2189,19 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
+                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -2291,19 +2291,19 @@
                  },
                  "targets": [
                     {
-                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
+                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)",
+                        "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "request",
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)",
+                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -2354,7 +2354,7 @@
                  },
                  "targets": [
                     {
-                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
+                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ruler|loki-backend|loki-single-binary)\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
--- a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json
+++ b/charts/meta-monitoring/src/dashboards/loki-writes-resources.json
@@ -104,7 +104,7 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))",
+                        "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
@@ -116,7 +116,7 @@
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
+                        "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -206,7 +206,7 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
+                        "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
@@ -218,7 +218,7 @@
                        "legendLink": null
                     },
                     {
-                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)",
+                        "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} > 0)",
                        "format": "time_series",
                        "legendFormat": "limit",
                        "legendLink": null
@@ -269,7 +269,7 @@
                  "span": 4,
                  "targets": [
                     {
-                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*distributor\"})",
+                        "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|loki-write|loki-single-binary)\"})",
                        "format": "time_series",
                        "legendFormat": "{{pod}}",
                        "legendLink": null
--- a/charts/meta-monitoring/src/rules/loki-rules.yaml
+++ b/charts/meta-monitoring/src/rules/loki-rules.yaml
@@ -1,52 +1,53 @@
- name: "loki_rules"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:loki_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:loki_request_duration_seconds:50quantile"
-  - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[5m]))
-      by (cluster, job)"
-    record: "cluster_job:loki_request_duration_seconds:avg"
-  - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job)"
-    record: "cluster_job:loki_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job)"
-    record: "cluster_job:loki_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job)"
-    record: "cluster_job:loki_request_duration_seconds_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, route))"
-    record: "cluster_job_route:loki_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, route))"
-    record: "cluster_job_route:loki_request_duration_seconds:50quantile"
-  - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)
-      / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
-    record: "cluster_job_route:loki_request_duration_seconds:avg"
-  - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job,
-      route)"
-    record: "cluster_job_route:loki_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)"
-    record: "cluster_job_route:loki_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
-    record: "cluster_job_route:loki_request_duration_seconds_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
-      by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:loki_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
-      by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:loki_request_duration_seconds:50quantile"
-  - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
-      job, route) / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster,
-      namespace, job, route)"
-    record: "cluster_namespace_job_route:loki_request_duration_seconds:avg"
-  - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, namespace,
-      job, route)"
-    record: "cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
-      job, route)"
-    record: "cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, namespace,
-      job, route)"
-    record: "cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate"
+groups:
+  - name: "loki_rules"
+    rules:
+    - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
+        by (le, cluster, job))"
+      record: "cluster_job:loki_request_duration_seconds:99quantile"
+    - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
+        by (le, cluster, job))"
+      record: "cluster_job:loki_request_duration_seconds:50quantile"
+    - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[5m]))
+        by (cluster, job)"
+      record: "cluster_job:loki_request_duration_seconds:avg"
+    - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job)"
+      record: "cluster_job:loki_request_duration_seconds_bucket:sum_rate"
+    - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job)"
+      record: "cluster_job:loki_request_duration_seconds_sum:sum_rate"
+    - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job)"
+      record: "cluster_job:loki_request_duration_seconds_count:sum_rate"
+    - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
+        by (le, cluster, job, route))"
+      record: "cluster_job_route:loki_request_duration_seconds:99quantile"
+    - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
+        by (le, cluster, job, route))"
+      record: "cluster_job_route:loki_request_duration_seconds:50quantile"
+    - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)
+        / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
+      record: "cluster_job_route:loki_request_duration_seconds:avg"
+    - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job,
+        route)"
+      record: "cluster_job_route:loki_request_duration_seconds_bucket:sum_rate"
+    - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)"
+      record: "cluster_job_route:loki_request_duration_seconds_sum:sum_rate"
+    - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
+      record: "cluster_job_route:loki_request_duration_seconds_count:sum_rate"
+    - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
+        by (le, cluster, namespace, job, route))"
+      record: "cluster_namespace_job_route:loki_request_duration_seconds:99quantile"
+    - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
+        by (le, cluster, namespace, job, route))"
+      record: "cluster_namespace_job_route:loki_request_duration_seconds:50quantile"
+    - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
+        job, route) / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster,
+        namespace, job, route)"
+      record: "cluster_namespace_job_route:loki_request_duration_seconds:avg"
+    - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, namespace,
+        job, route)"
+      record: "cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
+    - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
+        job, route)"
+      record: "cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
+    - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, namespace,
+        job, route)"
+      record: "cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate"
--- a/charts/meta-monitoring/src/rules/mimir-rules.yaml
+++ b/charts/meta-monitoring/src/rules/mimir-rules.yaml
@@ -1,555 +0,0 @@
-groups:
- name: "mimir_api_1"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[5m]))
-      by (cluster, job)"
-    record: "cluster_job:cortex_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_request_duration_seconds_bucket[5m])) by (le, cluster, job)"
-    record: "cluster_job:cortex_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_request_duration_seconds_count:sum_rate"
- name: "mimir_api_2"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, route))"
-    record: "cluster_job_route:cortex_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, route))"
-    record: "cluster_job_route:cortex_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job, route)
-      / sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, job, route)"
-    record: "cluster_job_route:cortex_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_request_duration_seconds_bucket[5m])) by (le, cluster, job,
-      route)"
-    record: "cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, job, route)"
-    record: "cluster_job_route:cortex_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, job, route)"
-    record: "cluster_job_route:cortex_request_duration_seconds_count:sum_rate"
- name: "mimir_api_3"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[5m]))
-      by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:cortex_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[5m]))
-      by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:cortex_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, namespace,
-      job, route) / sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster,
-      namespace, job, route)"
-    record: "cluster_namespace_job_route:cortex_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_request_duration_seconds_bucket[5m])) by (le, cluster, namespace,
-      job, route)"
-    record: "cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_request_duration_seconds_sum[5m])) by (cluster, namespace,
-      job, route)"
-    record: "cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_request_duration_seconds_count[5m])) by (cluster, namespace,
-      job, route)"
-    record: "cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate"
- name: "mimir_querier_api"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_querier_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_querier_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
-      job) / sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
-      job)"
-    record: "cluster_job:cortex_querier_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[5m])) by (le, cluster,
-      job)"
-    record: "cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
-      job)"
-    record: "cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
-      job)"
-    record: "cluster_job:cortex_querier_request_duration_seconds_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, route))"
-    record: "cluster_job_route:cortex_querier_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, route))"
-    record: "cluster_job_route:cortex_querier_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
-      job, route) / sum(rate(cortex_querier_request_duration_seconds_count[5m])) by
-      (cluster, job, route)"
-    record: "cluster_job_route:cortex_querier_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[5m])) by (le, cluster,
-      job, route)"
-    record: "cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
-      job, route)"
-    record: "cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
-      job, route)"
-    record: "cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
-      by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[5m]))
-      by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
-      namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[5m]))
-      by (cluster, namespace, job, route)"
-    record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_bucket[5m])) by (le, cluster,
-      namespace, job, route)"
-    record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_sum[5m])) by (cluster,
-      namespace, job, route)"
-    record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_querier_request_duration_seconds_count[5m])) by (cluster,
-      namespace, job, route)"
-    record: "cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate"
- name: "mimir_cache"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, method))"
-    record: "cluster_job_method:cortex_memcache_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, method))"
-    record: "cluster_job_method:cortex_memcache_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_memcache_request_duration_seconds_sum[5m])) by (cluster,
-      job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[5m]))
-      by (cluster, job, method)"
-    record: "cluster_job_method:cortex_memcache_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_memcache_request_duration_seconds_bucket[5m])) by (le, cluster,
-      job, method)"
-    record: "cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_memcache_request_duration_seconds_sum[5m])) by (cluster,
-      job, method)"
-    record: "cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_memcache_request_duration_seconds_count[5m])) by (cluster,
-      job, method)"
-    record: "cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_cache_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_cache_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job)
-      / sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_cache_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_bucket[5m])) by (le, cluster,
-      job)"
-    record: "cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster,
-      job)"
-    record: "cluster_job:cortex_cache_request_duration_seconds_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, method))"
-    record: "cluster_job_method:cortex_cache_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job, method))"
-    record: "cluster_job_method:cortex_cache_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job,
-      method) / sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster,
-      job, method)"
-    record: "cluster_job_method:cortex_cache_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_bucket[5m])) by (le, cluster,
-      job, method)"
-    record: "cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_sum[5m])) by (cluster, job,
-      method)"
-    record: "cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_cache_request_duration_seconds_count[5m])) by (cluster,
-      job, method)"
-    record: "cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate"
- name: "mimir_storage"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_kv_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_kv_request_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_kv_request_duration_seconds_sum[5m])) by (cluster, job)
-      / sum(rate(cortex_kv_request_duration_seconds_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_kv_request_duration_seconds:avg"
-  - expr: "sum(rate(cortex_kv_request_duration_seconds_bucket[5m])) by (le, cluster,
-      job)"
-    record: "cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_kv_request_duration_seconds_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_kv_request_duration_seconds_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_kv_request_duration_seconds_count:sum_rate"
- name: "mimir_queries"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_query_frontend_retries:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_query_frontend_retries:50quantile"
-  - expr: "sum(rate(cortex_query_frontend_retries_sum[5m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[5m]))
-      by (cluster, job)"
-    record: "cluster_job:cortex_query_frontend_retries:avg"
-  - expr: "sum(rate(cortex_query_frontend_retries_bucket[5m])) by (le, cluster, job)"
-    record: "cluster_job:cortex_query_frontend_retries_bucket:sum_rate"
-  - expr: "sum(rate(cortex_query_frontend_retries_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_query_frontend_retries_sum:sum_rate"
-  - expr: "sum(rate(cortex_query_frontend_retries_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_query_frontend_retries_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile"
-  - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_sum[5m])) by (cluster,
-      job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[5m])) by
-      (cluster, job)"
-    record: "cluster_job:cortex_query_frontend_queue_duration_seconds:avg"
-  - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[5m])) by (le,
-      cluster, job)"
-    record: "cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_sum[5m])) by (cluster,
-      job)"
-    record: "cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(cortex_query_frontend_queue_duration_seconds_count[5m])) by (cluster,
-      job)"
-    record: "cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate"
- name: "mimir_ingester_queries"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_ingester_queried_series:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_ingester_queried_series:50quantile"
-  - expr: "sum(rate(cortex_ingester_queried_series_sum[5m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[5m]))
-      by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_series:avg"
-  - expr: "sum(rate(cortex_ingester_queried_series_bucket[5m])) by (le, cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_series_bucket:sum_rate"
-  - expr: "sum(rate(cortex_ingester_queried_series_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_series_sum:sum_rate"
-  - expr: "sum(rate(cortex_ingester_queried_series_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_series_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_ingester_queried_samples:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_ingester_queried_samples:50quantile"
-  - expr: "sum(rate(cortex_ingester_queried_samples_sum[5m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[5m]))
-      by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_samples:avg"
-  - expr: "sum(rate(cortex_ingester_queried_samples_bucket[5m])) by (le, cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_samples_bucket:sum_rate"
-  - expr: "sum(rate(cortex_ingester_queried_samples_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_samples_sum:sum_rate"
-  - expr: "sum(rate(cortex_ingester_queried_samples_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_samples_count:sum_rate"
-  - expr: "histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_ingester_queried_exemplars:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[5m]))
-      by (le, cluster, job))"
-    record: "cluster_job:cortex_ingester_queried_exemplars:50quantile"
-  - expr: "sum(rate(cortex_ingester_queried_exemplars_sum[5m])) by (cluster, job) /
-      sum(rate(cortex_ingester_queried_exemplars_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_exemplars:avg"
-  - expr: "sum(rate(cortex_ingester_queried_exemplars_bucket[5m])) by (le, cluster,
-      job)"
-    record: "cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate"
-  - expr: "sum(rate(cortex_ingester_queried_exemplars_sum[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate"
-  - expr: "sum(rate(cortex_ingester_queried_exemplars_count[5m])) by (cluster, job)"
-    record: "cluster_job:cortex_ingester_queried_exemplars_count:sum_rate"
- name: "mimir_received_samples"
-  rules:
-  - expr: "sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m]))"
-    record: "cluster_namespace_job:cortex_distributor_received_samples:rate5m"
- name: "mimir_exemplars_in"
-  rules:
-  - expr: "sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m]))"
-    record: "cluster_namespace_job:cortex_distributor_exemplars_in:rate5m"
- name: "mimir_received_exemplars"
-  rules:
-  - expr: "sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m]))"
-    record: "cluster_namespace_job:cortex_distributor_received_exemplars:rate5m"
- name: "mimir_exemplars_ingested"
-  rules:
-  - expr: "sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m]))"
-    record: "cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m"
- name: "mimir_exemplars_appended"
-  rules:
-  - expr: "sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m]))"
-    record: "cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m"
- name: "mimir_scaling_rules"
-  rules:
-  - expr: |
-      # Convenience rule to get the number of replicas for both a deployment and a statefulset.
-      # Multi-zone deployments are grouped together removing the "zone-X" suffix.
-      sum by (cluster, namespace, deployment) (
-        label_replace(
-          kube_deployment_spec_replicas,
-          # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-          # always matches everything and the (optional) zone is not removed.
-          "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-        )
-      )
-      or
-      sum by (cluster, namespace, deployment) (
-        label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")
-      )
-    record: "cluster_namespace_deployment:actual_replicas:count"
-  - expr: |
-      ceil(
-        quantile_over_time(0.99,
-          sum by (cluster, namespace) (
-            cluster_namespace_job:cortex_distributor_received_samples:rate5m
-          )[24h:]
-        )
-        / 240000
-      )
-    labels:
-      deployment: "distributor"
-      reason: "sample_rate"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      ceil(
-        sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"})
-        * 0.59999999999999998 / 240000
-      )
-    labels:
-      deployment: "distributor"
-      reason: "sample_rate_limits"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      ceil(
-        quantile_over_time(0.99,
-          sum by (cluster, namespace) (
-            cluster_namespace_job:cortex_distributor_received_samples:rate5m
-          )[24h:]
-        )
-        * 3 / 80000
-      )
-    labels:
-      deployment: "ingester"
-      reason: "sample_rate"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      ceil(
-        quantile_over_time(0.99,
-          sum by(cluster, namespace) (
-            cortex_ingester_memory_series
-          )[24h:]
-        )
-        / 1500000
-      )
-    labels:
-      deployment: "ingester"
-      reason: "active_series"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      ceil(
-        sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"})
-        * 3 * 0.59999999999999998 / 1500000
-      )
-    labels:
-      deployment: "ingester"
-      reason: "active_series_limits"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      ceil(
-        sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"})
-        * 0.59999999999999998 / 80000
-      )
-    labels:
-      deployment: "ingester"
-      reason: "sample_rate_limits"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      ceil(
-        (sum by (cluster, namespace) (
-          cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"}
-        ) / 4)
-          /
-        avg by (cluster, namespace) (
-          memcached_limit_bytes{job=~".+/memcached"}
-        )
-      )
-    labels:
-      deployment: "memcached"
-      reason: "active_series"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      sum by (cluster, namespace, deployment) (
-        label_replace(
-          label_replace(
-            sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[5m])),
-            "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
-          ),
-          # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-          # always matches everything and the (optional) zone is not removed.
-          "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-        )
-      )
-    record: "cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate"
-  - expr: |
-      # Convenience rule to get the CPU request for both a deployment and a statefulset.
-      # Multi-zone deployments are grouped together removing the "zone-X" suffix.
-      # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
-      # that remove resource metrics, ref:
-      # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16
-      # - https://github.com/kubernetes/kube-state-metrics/pull/1004
-      #
-      # This is the old expression, compatible with kube-state-metrics < v2.0.0,
-      # where kube_pod_container_resource_requests_cpu_cores was removed:
-      (
-        sum by (cluster, namespace, deployment) (
-          label_replace(
-            label_replace(
-              kube_pod_container_resource_requests_cpu_cores,
-              "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
-            ),
-            # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-            # always matches everything and the (optional) zone is not removed.
-            "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-          )
-        )
-      )
-      or
-      # This expression is compatible with kube-state-metrics >= v1.4.0,
-      # where kube_pod_container_resource_requests was introduced.
-      (
-        sum by (cluster, namespace, deployment) (
-          label_replace(
-            label_replace(
-              kube_pod_container_resource_requests{resource="cpu"},
-              "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
-            ),
-            # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-            # always matches everything and the (optional) zone is not removed.
-            "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-          )
-        )
-      )
-    record: "cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum"
-  - expr: |
-      # Jobs should be sized to their CPU usage.
-      # We do this by comparing 99th percentile usage over the last 24hrs to
-      # their current provisioned #replicas and resource requests.
-      ceil(
-        cluster_namespace_deployment:actual_replicas:count
-          *
-        quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h])
-          /
-        cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum
-      )
-    labels:
-      reason: "cpu_usage"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
-  - expr: |
-      # Convenience rule to get the Memory utilization for both a deployment and a statefulset.
-      # Multi-zone deployments are grouped together removing the "zone-X" suffix.
-      sum by (cluster, namespace, deployment) (
-        label_replace(
-          label_replace(
-            container_memory_usage_bytes{image!=""},
-            "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
-          ),
-          # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-          # always matches everything and the (optional) zone is not removed.
-          "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-        )
-      )
-    record: "cluster_namespace_deployment:container_memory_usage_bytes:sum"
-  - expr: |
-      # Convenience rule to get the Memory request for both a deployment and a statefulset.
-      # Multi-zone deployments are grouped together removing the "zone-X" suffix.
-      # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
-      # that remove resource metrics, ref:
-      # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16
-      # - https://github.com/kubernetes/kube-state-metrics/pull/1004
-      #
-      # This is the old expression, compatible with kube-state-metrics < v2.0.0,
-      # where kube_pod_container_resource_requests_memory_bytes was removed:
-      (
-        sum by (cluster, namespace, deployment) (
-          label_replace(
-            label_replace(
-              kube_pod_container_resource_requests_memory_bytes,
-              "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
-            ),
-            # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-            # always matches everything and the (optional) zone is not removed.
-            "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-          )
-        )
-      )
-      or
-      # This expression is compatible with kube-state-metrics >= v1.4.0,
-      # where kube_pod_container_resource_requests was introduced.
-      (
-        sum by (cluster, namespace, deployment) (
-          label_replace(
-            label_replace(
-              kube_pod_container_resource_requests{resource="memory"},
-              "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
-            ),
-            # The question mark in "(.*?)" is used to make it non-greedy, otherwise it
-            # always matches everything and the (optional) zone is not removed.
-            "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
-          )
-        )
-      )
-    record: "cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum"
-  - expr: |
-      # Jobs should be sized to their Memory usage.
-      # We do this by comparing 99th percentile usage over the last 24hrs to
-      # their current provisioned #replicas and resource requests.
-      ceil(
-        cluster_namespace_deployment:actual_replicas:count
-          *
-        quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h])
-          /
-        cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum
-      )
-    labels:
-      reason: "memory_usage"
-    record: "cluster_namespace_deployment_reason:required_replicas:count"
- name: "mimir_alertmanager_rules"
-  rules:
-  - expr: "sum by (cluster, job, pod) (cortex_alertmanager_alerts)"
-    record: "cluster_job_pod:cortex_alertmanager_alerts:sum"
-  - expr: "sum by (cluster, job, pod) (cortex_alertmanager_silences)"
-    record: "cluster_job_pod:cortex_alertmanager_silences:sum"
-  - expr: "sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m]))"
-    record: "cluster_job:cortex_alertmanager_alerts_received_total:rate5m"
-  - expr: "sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m]))"
-    record: "cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m"
-  - expr: "sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m]))"
-    record: "cluster_job_integration:cortex_alertmanager_notifications_total:rate5m"
-  - expr: "sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m]))"
-    record: "cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m"
-  - expr: "sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m]))"
-    record: "cluster_job:cortex_alertmanager_state_replication_total:rate5m"
-  - expr: "sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m]))"
-    record: "cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m"
-  - expr: "sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m]))"
-    record: "cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m"
-  - expr: "sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m]))"
-    record: "cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m"
- name: "mimir_ingester_rules"
-  rules:
-  - expr: "sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[5m]))"
-    record: "cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m"
--- a/charts/meta-monitoring/src/rules/tempo-rules.yaml
+++ b/charts/meta-monitoring/src/rules/tempo-rules.yaml
@@ -1,15 +0,0 @@
-groups:
- name: "tempo_rules"
-  rules:
-  - expr: "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:tempo_request_duration_seconds:99quantile"
-  - expr: "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, job, route))"
-    record: "cluster_namespace_job_route:tempo_request_duration_seconds:50quantile"
-  - expr: "sum(rate(tempo_request_duration_seconds_sum[5m])) by (cluster, namespace, job, route) / sum(rate(tempo_request_duration_seconds_count[5m])) by (cluster, namespace, job, route)"
-    record: "cluster_namespace_job_route:tempo_request_duration_seconds:avg"
-  - expr: "sum(rate(tempo_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, job, route)"
-    record: "cluster_namespace_job_route:tempo_request_duration_seconds_bucket:sum_rate"
-  - expr: "sum(rate(tempo_request_duration_seconds_sum[5m])) by (cluster, namespace, job, route)"
-    record: "cluster_namespace_job_route:tempo_request_duration_seconds_sum:sum_rate"
-  - expr: "sum(rate(tempo_request_duration_seconds_count[5m])) by (cluster, namespace, job, route)"
-    record: "cluster_namespace_job_route:tempo_request_duration_seconds_count:sum_rate"
--- a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl
+++ b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl
@@ -9,6 +9,15 @@
 {{- define "agent.all_namespaces" -}}
 {{- $list := list }}
 {{- range .Values.namespacesToMonitor }}
+{{- $list = append $list (printf "\"%s\"" .) }}
+{{- end }}
+{{- $list = append $list (printf "\"%s\"" .Release.Namespace) }}
+{{- join ", " $list }}
+{{- end }}
+
+{{- define "agent.all_namespaces_bar" -}}
+{{- $list := list }}
+{{- range .Values.namespacesToMonitor }}
 {{- $list = append $list (printf "%s" .) }}
 {{- end }}
 {{- $list = append $list .Release.Namespace }}
@@ -48,7 +57,7 @@
 {{- define "agent.tempo_write_targets" -}}
 {{- $list := list }}
 {{- if .Values.local.traces.enabled }}
-{{- $list = append $list ("otelcol.exporter.otlp.local.input") }}
+{{- $list = append $list ("otelcol.exporter.otlphttp.local.input") }}
 {{- end }}
 {{- if .Values.cloud.traces.enabled }}
 {{- $list = append $list ("otelcol.exporter.otlphttp.cloud.input") }}
--- a/charts/meta-monitoring/templates/agent/config.yaml
+++ b/charts/meta-monitoring/templates/agent/config.yaml
@@ -93,7 +93,7 @@ data:
      role = "pod"
      namespaces {
        own_namespace = true
-        names = [ {{ include "agent.namespaces" . }} ]
+        names = [ {{ include "agent.all_namespaces" . }} ]
      }
    }

@@ -135,6 +135,11 @@ data:
    }

    prometheus.relabel "filter" {
+      rule {
+        target_label = "cluster"
+        replacement = "{{- .Values.clusterLabelValue -}}"
+      }
+
      rule {
        source_labels = ["__name__"]
        regex = "({{ include "agent.all_metrics" . }})"
@@ -143,7 +148,7 @@ data:

      rule {
        source_labels = ["namespace"]
-        regex = "{{ include "agent.all_namespaces" . }}"
+        regex = "{{ include "agent.all_namespaces_bar" . }}"

        action = "keep"
      }
@@ -294,9 +299,7 @@ data:
      // We don't technically need this, but it shows how to change listen address and incoming port.
      // In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the
      // default OTLP gRPC port) for the OTLP protocol.
-      grpc {
-        endpoint = "0.0.0.0:4317"
-      }
+      grpc {}

      // We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor
      // named 'default'.
@@ -332,7 +335,7 @@ data:
    {{- if .Values.local.logs.enabled }}
    loki.write "local" {
      endpoint {
-        url = "http://{{- .Release.Namespace -}}-loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
+        url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push"
      }
    }
    {{- end }}
@@ -345,6 +348,14 @@ data:
    }
    {{- end }}

+    {{- if .Values.local.traces.enabled }}
+    otelcol.exporter.otlphttp "local" {
+        client {
+            endpoint = "http://{{- .Release.Name -}}-tempo-distributor.{{- .Release.Namespace -}}.svc:4318"
+        }
+    }
+    {{- end }}
+
    {{- if .Values.cloud.logs.enabled }}
    loki.write "cloud" {
      endpoint {
--- a/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml
+++ b/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml
@@ -1,19 +0,0 @@
-{{- if and .Values.local.grafana.enabled (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled .Values.dashboards.traces.enabled) }}
---
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: agent-dashboards-1
-  namespace: {{ $.Release.Namespace }}
-data:
-  "agent-logs-pipeline.json": |
-    {{ $.Files.Get "src/dashboards/agent-logs-pipeline.json" | fromJson | toJson }}
-  "agent-operational.json": |
-    {{ $.Files.Get "src/dashboards/agent-operational.json" | fromJson | toJson }}
-  "agent-remote-write.json": |
-    {{ $.Files.Get "src/dashboards/agent-remote-write.json" | fromJson | toJson }}
-  "agent-tracing-pipeline.json": |
-    {{ $.Files.Get "src/dashboards/agent-tracing-pipeline.json" | fromJson | toJson }}
-  "agent.json": |
-    {{ $.Files.Get "src/dashboards/agent.json" | fromJson | toJson }}
-{{- end }}
--- a/charts/meta-monitoring/templates/grafana/alloy-dashboards-1.yaml
+++ b/charts/meta-monitoring/templates/grafana/alloy-dashboards-1.yaml
@@ -0,0 +1,21 @@
+{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: alloy-dashboards-1
+  namespace: {{ $.Release.Namespace }}
+data:
+  "alloy-cluster-node.json": |
+    {{ $.Files.Get "src/dashboards/alloy-cluster-node.json" | fromJson | toJson }}
+  "alloy-cluster-overview.json": |
+    {{ $.Files.Get "src/dashboards/alloy-cluster-overview.json" | fromJson | toJson }}
+  "alloy-controller.json": |
+    {{ $.Files.Get "src/dashboards/alloy-controller.json" | fromJson | toJson }}
+  "alloy-opentelemetry.json": |
+    {{ $.Files.Get "src/dashboards/alloy-opentelemetry.json" | fromJson | toJson }}
+  "alloy-prometheus.json": |
+    {{ $.Files.Get "src/dashboards/alloy-prometheus.json" | fromJson | toJson }}
+  "alloy-resources.json": |
+    {{ $.Files.Get "src/dashboards/alloy-resources.json" | fromJson | toJson }}
+{{- end }}
--- a/charts/meta-monitoring/templates/grafana/dashboard.yaml
+++ b/charts/meta-monitoring/templates/grafana/dashboard.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.local.grafana.enabled (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled .Values.dashboards.traces.enabled) }}
+{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -30,10 +30,10 @@ data:
 {{- end }}
      - disableDeletion: true
        editable: false
-        folder: Agent
-        name: agent-1
+        folder: Alloy
+        name: alloy-1
        options:
-          path: /var/lib/grafana/dashboards/agent-1
+          path: /var/lib/grafana/dashboards/alloy-1
        orgId: 1
        type: file
 {{- end }}
--- a/charts/meta-monitoring/templates/grafana/grafana-deployment.yaml
+++ b/charts/meta-monitoring/templates/grafana/grafana-deployment.yaml
@@ -1,16 +1,4 @@
 {{- if .Values.local.grafana.enabled }}
---
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: grafana-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -32,7 +20,7 @@ spec:
          - 0
      containers:
        - name: grafana
-          image: grafana/grafana:10.0.0
+          image: grafana/grafana:{{- .Values.grafana.version }}
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 3000
@@ -65,7 +53,7 @@ spec:
              name: grafana-pv
            - mountPath: /etc/grafana/provisioning/datasources
              name: datasources-provisioning
-            {{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
+            {{- if .Values.dashboards.logs.enabled }}
            - mountPath: /etc/grafana/provisioning/dashboards
              name: dashboards-provisioning
            {{- end }}
@@ -75,8 +63,8 @@ spec:
            - mountPath: /var/lib/grafana/dashboards/loki-2
              name: loki-dashboards-2
            {{- end }}
-            - mountPath: /var/lib/grafana/dashboards/agent-1
-              name: agent-dashboards-1
+            - mountPath: /var/lib/grafana/dashboards/alloy-1
+              name: alloy-dashboards-1
      volumes:
        - name: grafana-pv
          persistentVolumeClaim:
@@ -95,22 +83,7 @@ spec:
          configMap:
            name: loki-dashboards-2
        {{- end }}
-        - name: agent-dashboards-1
+        - name: alloy-dashboards-1
          configMap:
-            name: agent-dashboards-1
-
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: grafana
-spec:
-  ports:
-    - port: 3000
-      protocol: TCP
-      targetPort: http-grafana
-  selector:
-    app: grafana
-  sessionAffinity: None
-  type: ClusterIP  # Make this configurable
+            name: alloy-dashboards-1
 {{- end }}
--- a/charts/meta-monitoring/templates/grafana/grafana-pvc.yaml
+++ b/charts/meta-monitoring/templates/grafana/grafana-pvc.yaml
@@ -0,0 +1,12 @@
+{{- if .Values.local.grafana.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: grafana-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+{{- end }}
--- a/charts/meta-monitoring/templates/grafana/grafana-service.yaml
+++ b/charts/meta-monitoring/templates/grafana/grafana-service.yaml
@@ -0,0 +1,15 @@
+{{- if .Values.local.grafana.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: grafana
+spec:
+  ports:
+    - port: 3000
+      protocol: TCP
+      targetPort: http-grafana
+  selector:
+    app: grafana
+  sessionAffinity: None
+  type: ClusterIP  # Make this configurable
+{{- end }}
--- a/charts/meta-monitoring/templates/grafana/loki-dashboards-1.yaml
+++ b/charts/meta-monitoring/templates/grafana/loki-dashboards-1.yaml
@@ -12,8 +12,6 @@ data:
    {{ $.Files.Get "src/dashboards/loki-deletion.json" | fromJson | toJson }}
  "loki-logs.json": |
    {{ $.Files.Get "src/dashboards/loki-logs.json" | fromJson | toJson }}
-  "loki-mixin-recording-rules.json": |
-    {{ $.Files.Get "src/dashboards/loki-mixin-recording-rules.json" | fromJson | toJson }}
  "loki-operational.json": |
    {{ $.Files.Get "src/dashboards/loki-operational.json" | fromJson | toJson }}
 {{- end }}
--- a/charts/meta-monitoring/templates/ruler/ruler.yaml
+++ b/charts/meta-monitoring/templates/ruler/ruler.yaml
@@ -1,5 +1,5 @@
 {{- if .Values.local.grafana.enabled }}
-{{- if and .Values.local.grafana.enabled (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled .Values.dashboards.traces.enabled) }}
+{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
--- a/charts/meta-monitoring/templates/ruler/rules-configmap.yaml
+++ b/charts/meta-monitoring/templates/ruler/rules-configmap.yaml
@@ -1,5 +1,5 @@
 {{- if .Values.local.metrics.enabled }}
-{{- if and .Values.local.grafana.enabled (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled .Values.dashboards.traces.enabled) }}
+{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -10,11 +10,5 @@ data:
 {{- if .Values.dashboards.logs.enabled }}
 {{ ($.Files.Glob "src/rules/loki-rules.yaml").AsConfig | indent 2 }}
 {{- end }}
-{{- if .Values.dashboards.metrics.enabled }}
-{{ ($.Files.Glob "src/rules/mimir-rules.yaml").AsConfig | indent 2 }}
-{{- end }}
-{{- if .Values.dashboards.traces.enabled }}
-{{ ($.Files.Glob "src/rules/tempo-rules.yaml").AsConfig | indent 2 }}
-{{- end }}
 {{- end }}
 {{- end }}
--- a/charts/meta-monitoring/values.yaml
+++ b/charts/meta-monitoring/values.yaml
@@ -2,8 +2,7 @@
 namespacesToMonitor:
 - loki
 # The name of the cluster where this will be installed
-clusterLabelValue: "meta-monitoring"
-
+clusterLabelValue: "meta"
 # Set to true to write logs, metrics or traces to Grafana Cloud
 # The secrets have to be created first
 cloud:
@@ -16,7 +15,6 @@ cloud:
  traces:
    enabled: true
    secret: "traces"
-
 # Set to true for a local version of logs, metrics or traces
 local:
  grafana:
@@ -28,9 +26,9 @@ local:
  traces:
    enabled: false
  minio:
-    enabled: false  # This should be set to true if any of the previous is enabled
-
+    enabled: false # This should be set to true if any of the previous is enabled
 grafana:
+  version: 10.4.2
  # Gateway ingress configuration
  ingress:
    # -- Specifies whether an ingress for the gateway should be created
@@ -38,33 +36,34 @@ grafana:
    # -- Ingress Class Name. MAY be required for Kubernetes versions >= 1.18
    ingressClassName: ""
    # -- Annotations for the gateway ingress
-    annotations: { }
+    annotations: {}
    # -- Labels for the gateway ingress
-    labels: { }
+    labels: {}
    # -- Hosts configuration for the gateway ingress, passed through the `tpl` function to allow templating
    hosts:
      - host: monitoring.example.com
        paths:
          - path: /
            # -- pathType (e.g. ImplementationSpecific, Prefix, .. etc.) might also be required by some Ingress Controllers
-            # pathType: Prefix
+            pathType: Prefix
+            # backend:
+            #   service:
+            #     name: TODO
+            #     port:
+            #       number: TODO
    # -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating
    #tls:
    #  - secretName: grafana-tls
    #    hosts:
    #      - monitoring.example.com
-
-
 logs:
  # Adding regexes here will add a stage.replace block for logs. For more information see
  # https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
-  piiRegexes:
-  # This example replaces the word after password with *****
-  # - expression: "password (\\\\S+)"
-  #   source: ""         # Empty uses the log message
-  #   replace: "*****""
-
-  # The lines matching these will be kept in Loki
+  piiRegexes: null # This example replaces the word after password with *****
+# - expression: "password (\\\\S+)"
+#   source: ""         # Empty uses the log message
+#   replace: "*****""
+# The lines matching these will be kept in Loki
  retain:
  # This shows the queries
  - caller=metrics.go
@@ -78,13 +77,49 @@ logs:
  # - caller=push.go
  # Additional log lines to retain
  extraLogs: []
-
 metrics:
  # The list of metrics to retain for logging dashboards
  retain:
-  - agent_config_last_load_success_timestamp_seconds
-  - agent_config_last_load_successful
-  - agent_config_load_failures_total
+  - alloy_build_info
+  - alloy_config_last_load_success_timestamp_seconds
+  - alloy_config_last_load_successful
+  - alloy_config_load_failures_total
+  - alloy_component_controller_evaluating
+  - alloy_component_dependencies_wait_seconds
+  - alloy_component_dependencies_wait_seconds_bucket
+  - alloy_component_evaluation_seconds
+  - alloy_component_evaluation_seconds_bucket
+  - alloy_component_evaluation_seconds_count
+  - alloy_component_evaluation_seconds_sum
+  - alloy_component_evaluation_slow_seconds
+  - alloy_component_controller_running_components
+  - alloy_resources_machine_rx_bytes_total
+  - alloy_resources_machine_tx_bytes_total
+  - alloy_resources_process_cpu_seconds_total
+  - alloy_resources_process_resident_memory_bytes
+  - prometheus_remote_write_wal_samples_appended_total
+  - prometheus_remote_write_wal_storage_active_series
+  - cluster_node_info
+  - cluster_node_lamport_time
+  - cluster_node_update_observers
+  - cluster_node_gossip_health_score
+  - cluster_node_gossip_proto_version
+  - cluster_node_gossip_received_events_total
+  - cluster_node_peers
+  - cluster_transport_rx_bytes_total
+  - cluster_transport_rx_packets_total
+  - cluster_transport_rx_packets_failed_total
+  - cluster_transport_stream_rx_bytes_total
+  - cluster_transport_stream_rx_packets_failed_total
+  - cluster_transport_stream_rx_packets_total
+  - cluster_transport_stream_tx_bytes_total
+  - cluster_transport_stream_tx_packets_total
+  - cluster_transport_stream_tx_packets_failed_total
+  - cluster_transport_streams
+  - cluster_transport_tx_packets_total
+  - cluster_transport_tx_packets_failed_total
+  - cluster_transport_rx_packet_queue_length
+  - cluster_transport_tx_packet_queue_length
  - container_cpu_usage_seconds_total
  - container_fs_writes_bytes_total
  - container_memory_working_set_bytes
@@ -100,7 +135,10 @@ metrics:
  - cortex_prometheus_rule_group_last_duration_seconds
  - cortex_prometheus_rule_group_last_evaluation_timestamp_seconds
  - cortex_prometheus_rule_group_iterations_missed_total
+  - exporter_send_failed_spans_ratio_total
+  - exporter_sent_spans_ratio_total
  - go_gc_duration_seconds
+  - go_gc_duration_seconds_count
  - go_goroutines
  - go_memstats_heap_inuse_bytes
  - kubelet_volume_stats_used_bytes
@@ -111,6 +149,7 @@ metrics:
  - kube_pod_container_resource_requests
  - kube_pod_container_status_last_terminated_reason
  - kube_pod_container_status_restarts_total
+  - loki_azure_blob_request_duration_seconds_bucket
  - loki_boltdb_shipper_compact_tables_operation_duration_seconds
  - loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds
  - loki_boltdb_shipper_retention_marker_count_total
@@ -136,10 +175,15 @@ metrics:
  - loki_compactor_deleted_lines
  - loki_compactor_oldest_pending_delete_request_age_seconds
  - loki_compactor_pending_delete_requests_count
+  - loki_consul_request_duration_seconds_bucket
  - loki_discarded_samples_total
+  - loki_discarded_bytes_total
  - loki_distributor_bytes_received_total
  - loki_distributor_lines_received_total
  - loki_distributor_structured_metadata_bytes_received_total
+  - loki_gcs_request_duration_seconds_bucket
+  - loki_gcs_request_duration_seconds_count
+  - loki_index_request_duration_seconds_bucket
  - loki_index_request_duration_seconds_count
  - loki_ingester_chunk_age_seconds_bucket
  - loki_ingester_chunk_age_seconds_count
@@ -152,6 +196,7 @@ metrics:
  - loki_ingester_chunk_entries_sum
  - loki_ingester_chunk_size_bytes_bucket
  - loki_ingester_chunk_utilization_bucket
+  - loki_ingester_chunk_utilization_count
  - loki_ingester_chunk_utilization_sum
  - loki_ingester_chunks_flushed_total
  - loki_ingester_flush_queue_length
@@ -169,6 +214,8 @@ metrics:
  - loki_ruler_wal_prometheus_remote_storage_samples_total
  - loki_ruler_wal_samples_appended_total
  - loki_ruler_wal_storage_created_series_total
+  - loki_s3_request_duration_seconds_bucket
+  - loki_s3_request_duration_seconds_count
  - loki_write_batch_retries_total
  - loki_write_dropped_bytes_total
  - loki_write_dropped_entries_total
@@ -176,19 +223,64 @@ metrics:
  - loki_write_sent_entries_total
  - node_disk_read_bytes_total
  - node_disk_written_bytes_total
+  - process_start_time_seconds
+  - processor_batch_batch_send_size_ratio_bucket
+  - processor_batch_metadata_cardinality_ratio
+  - processor_batch_timeout_trigger_send_ratio_total
+  - prometheus_remote_storage_bytes_total
+  - prometheus_remote_storage_enqueue_retries_total
+  - prometheus_remote_storage_highest_timestamp_in_seconds
+  - prometheus_remote_storage_metadata_bytes_total
+  - prometheus_remote_storage_queue_highest_sent_timestamp_seconds
+  - prometheus_remote_storage_samples_dropped_total
+  - prometheus_remote_storage_samples_failed_total
+  - prometheus_remote_storage_samples_pending
+  - prometheus_remote_storage_samples_retried_total
+  - prometheus_remote_storage_samples_total
+  - prometheus_remote_storage_sent_batch_duration_seconds_bucket
+  - prometheus_remote_storage_sent_batch_duration_seconds_count
+  - prometheus_remote_storage_sent_batch_duration_seconds_sum
+  - prometheus_remote_storage_shard_capacity
+  - prometheus_remote_storage_shards
+  - prometheus_remote_storage_shards_desired
+  - prometheus_remote_storage_shards_max
+  - prometheus_remote_storage_shards_min
+  - prometheus_remote_storage_succeeded_samples_total
+  - prometheus_remote_write_wal_samples_appended_total
+  - prometheus_remote_write_wal_storage_active_series
+  - prometheus_sd_discovered_targets
+  - prometheus_target_interval_length_seconds_count
+  - prometheus_target_interval_length_seconds_sum
+  - prometheus_target_scrapes_exceeded_sample_limit_total
+  - prometheus_target_scrapes_sample_duplicate_timestamp_total
+  - prometheus_target_scrapes_sample_out_of_bounds_total
+  - prometheus_target_scrapes_sample_out_of_order_total
+  - prometheus_target_sync_length_seconds_sum
+  - prometheus_wal_watcher_current_segment
  - promtail_custom_bad_words_total
+  - promtail_dropped_bytes_total
+  - promtail_files_active_total
+  - promtail_read_bytes_total
+  - promtail_read_lines_total
+  - promtail_request_duration_seconds_bucket
+  - promtail_sent_entries_total
+  - rpc_server_duration_milliseconds_bucket
+  - receiver_accepted_spans_ratio_total
+  - receiver_refused_spans_ratio_total
+  - scrape_duration_seconds
+  - traces_exporter_sent_spans
+  - traces_exporter_send_failed_spans
+  - traces_loadbalancer_backend_outcome
+  - traces_loadbalancer_num_backends
+  - traces_receiver_accepted_spans
+  - traces_receiver_refused_spans
+  - up
  # Additional metrics to retain
  extraMetrics: []
-
 # Set enabled = true to add the default logs dashboards to the local Grafana
 dashboards:
  logs:
    enabled: true
-  metrics:
-    enabled: true
-  traces:
-    enabled: true
-
 kubeStateMetrics:
  # Scrape https://github.com/kubernetes/kube-state-metrics by default
  enabled: true
@@ -196,10 +288,8 @@ kubeStateMetrics:
  # https://artifacthub.io/packages/helm/prometheus-community/kube-state-metrics/
  # is used. Change this if kube-state-metrics is installed somewhere else.
  endpoint: kube-state-metrics.kube-state-metrics.svc.cluster.local:8080
-
 # The following are configuration for the dependencies.
 # These should usually not be changed.
-
 loki:
  loki:
    auth_enabled: false
@@ -267,7 +357,6 @@ loki:
    extraEnvFrom:
    - secretRef:
        name: "minio"
-
 alloy:
  alloy:
    clustering:
@@ -299,7 +388,6 @@ alloy:
      maxReplicas: 30
      targetMemoryUtilizationPercentage: 90
      targetCPUUtilizationPercentage: 90
-
 mimir-distributed:
  minio:
    enabled: false
@@ -330,7 +418,6 @@ mimir-distributed:
            insecure: true
      limits:
        compactor_blocks_retention_period: 30d
-
 tempo-distributed:
  tempo:
    structuredConfig:
@@ -379,7 +466,6 @@ tempo-distributed:
        enabled: true
      grpc:
        enabled: true
-
 minio:
  existingSecret: "minio"
  buckets:
--- a/docs/dev_update_dependencies.md
+++ b/docs/dev_update_dependencies.md
@@ -1,8 +1,12 @@
 # Update the dependencies

-The dependencies are the version of Loki, Mimir, Agent and so on that are included in this chart.
+The dependencies are the versions of Loki, Mimir, Agent and so on that are included in this chart.
 The current versions can be found in the [Chart.yaml](../charts/meta-monitoring/Chart.yaml) file.

+A Github action runs daily to see if updated versions are available. A PR will be created.
+
+The manual steps are as follows:
+
 Run this in the charts/meta-monitoring directory after updating a dependency:

 ```
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -4,7 +4,7 @@

 1. Use an existing Grafana Cloud account or setup a new one. Then create an access token:

-   1. In Grafana go to Administration -> Users and Access -> Cloud access policies.
+   1. In a Grafana instance on Grafana Cloud go to Administration -> Users and Access -> Cloud access policies.

   1. Click `Create access policy`.

@@ -39,7 +39,7 @@
    --from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
   ```

-   The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page.
+   The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page. The endpoints will also have to be changed to match your settings.

 1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:

@@ -102,7 +102,7 @@
       enabled: true
   ```

-## Installing the chart
+## Installing, updating and deleting the chart

 1. Add the repo

@@ -175,7 +175,7 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder

 ## Configure Loki to send traces

-1. In the Loki config enable tracing:
+1. In the Loki that is being monitored enable tracing in the config:

   ```
   loki:
@@ -187,7 +187,15 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder

   1. JAEGER_ENDPOINT: http address of the mmc-alloy service installed by the meta-monitoring chart, for example "http://mmc-alloy:14268/api/traces"
   1. JAEGER_AGENT_TAGS: extra tags you would like to add to the spans, for example  'cluster="abc",namespace="def"'
-   1. JAEGER_SAMPLER_TYPE: the sampling strategy, for example to sample all use 'const' with a value of 1 for the next environment variable
-   1. JAEGER_SAMPLER_PARAM: 1
+   1. JAEGER_SAMPLER_TYPE: the sampling strategy, we suggest setting this to `ratelimiting` so at most 1 trace is accepted per second. See these [docs](https://www.jaegertracing.io/docs/1.57/sampling/) for more options.
+   1. JAEGER_SAMPLER_PARAM: 1.0

 1. If Loki is installed in a different namespace you can create an [ExternalName service](https://kubernetes.io/docs/concepts/services-networking/service/#externalname) in Kubernetes to point to the mmc-alloy service in the meta monitoring namespace
+
+## Configure external access using an Ingress in local mode
+
+When using local mode by default a Kubernetes [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object is created to access the Grafana instance. This will need to be adapted to your cloud provider by updating the `grafana.ingress` section of the `values.yaml` file provided to Helm. Check the documentation of your cloud provider for available options.
+
+## Kube-state-metrics
+
+Metrics about Kubernetes objects are scraped from [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). This needs to be installed in the cluster. The `kubeStateMetrics.endpoint` entry in values.yaml should be set to it's address (without the `/metrics` part in the URL).
Author	SHA1	Message	Date
Michel Hollands	4e8b2be044	Update the README and docs Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-14 14:06:49 +01:00
Michel Hollands	df12d96f9c	Merge pull request #123 from grafana/cleanup_ci Comment out installation in CI for now	2024-05-14 11:51:24 +01:00
Michel Hollands	fcb5de6793	Comment out installation in CI for now Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-14 11:50:57 +01:00
Michel Hollands	661662caec	Merge pull request #121 from grafana/add_ci_install Add CI install step	2024-05-14 10:52:01 +01:00
Michel Hollands	2a681ce1eb	Add workflow dispatch Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-14 10:50:12 +01:00
Michel Hollands	52e4516e04	Add CI install step Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-14 10:48:45 +01:00
Michel Hollands	95085c4e72	Merge pull request #114 from grafana/chore/update-dependencies [dependency] Update the subcharts	2024-05-14 10:38:16 +01:00
Michel Hollands	55d3c9d723	Merge pull request #120 from grafana/add_ksm_docs Add kube-state-metrics documentation	2024-05-14 10:31:47 +01:00
Michel Hollands	618ab3778b	Add kube-state-metrics documentation Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-14 10:31:11 +01:00
Michel Hollands	89d9bdb5e2	Merge pull request #119 from grafana/fix_cluster_name Use shorter name for cluster	2024-05-14 08:47:56 +01:00
Michel Hollands	291f680c16	Use shorter name for cluster Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-14 08:46:45 +01:00
MichelHollands	3658769c7a	Update dependencies	2024-05-14 07:04:01 +00:00
Michel Hollands	1be9bc8d0a	Merge pull request #118 from grafana/fix_dashboards Fix dashboards a bit more	2024-05-13 17:04:45 +01:00
Michel Hollands	81d63a4383	Fix CPU usage of ssd querier Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 16:59:05 +01:00
Michel Hollands	333ba3a3fd	Add cluster to kube state metrics Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 16:58:07 +01:00
Michel Hollands	7aa091cbf8	Merge pull request #117 from grafana/fix_dashboards Fix dashboards	2024-05-13 14:48:45 +01:00
Michel Hollands	d309a5bc50	Fix mistakes Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 14:44:08 +01:00
Michel Hollands	346dd4968e	Make reads-resources work for all 3 deployment modes Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 14:36:53 +01:00
Michel Hollands	f5c9fa0593	Update operation so it works with all types of deployment Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 14:07:59 +01:00
Michel Hollands	d5e8df856d	Update writes dashboard work with all types Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 14:06:41 +01:00
Michel Hollands	2d85e7e120	Update dashboards so they work with single binary Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 10:56:35 +01:00
Michel Hollands	1a4a1ad885	Fix ruler panel Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 10:17:55 +01:00
Michel Hollands	c1ff364c29	Add missing metric in reads dashboard Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 09:45:37 +01:00
Michel Hollands	bd0ef0e2cc	Add missing values Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 09:21:05 +01:00
Michel Hollands	0216163885	Add chunk reason Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 09:11:20 +01:00
Michel Hollands	c42718649f	Fix distributor memory panel Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-13 09:03:02 +01:00
Michel Hollands	650df8217a	Merge pull request #116 from grafana/fix_loki_write_endpoint Fix local write end point	2024-05-13 08:24:18 +01:00
Michel Hollands	f7946ff713	Fix local write end point Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-12 14:32:39 +01:00
Michel Hollands	b312fc37fc	Merge pull request #115 from grafana/fix_traces_forwarding Fix local tracing pipeline	2024-05-10 15:44:03 +01:00
Michel Hollands	ad96f09600	Fix tracing pipeline Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-10 15:36:05 +01:00
Michel Hollands	090f1ef91a	Merge pull request #113 from grafana/change_default_sampling_type Suggest ratelimiting sample rate for Loki traces	2024-05-09 17:10:24 +01:00
Michel Hollands	b2957d90f0	Merge pull request #112 from grafana/update_ingress_documentation Add docs regarding the Ingress	2024-05-09 17:10:06 +01:00
Michel Hollands	f8aea814c5	Suggest ratelimiting sample rate for Loki traces Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-09 16:46:43 +01:00
Michel Hollands	91c19f07d3	Set default value for host again Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-09 16:35:46 +01:00
Michel Hollands	315b203082	Reference cloud provider docs Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-09 16:34:12 +01:00
Michel Hollands	caf4eda1be	Merge pull request #111 from grafana/create_new_version Update version	2024-05-09 09:40:54 +01:00
Michel Hollands	21ba3ebe8c	Update version Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-09 09:35:10 +01:00
Michel Hollands	f0a934a393	Merge pull request #109 from grafana/add_more_metrics Add the Alloy dashboards instead of the Agent ones	2024-05-09 09:26:13 +01:00
Michel Hollands	941420b417	Merge pull request #110 from grafana/chore/update-dependencies [dependency] Update the subcharts	2024-05-09 09:25:27 +01:00
MichelHollands	1ea10cdbfa	Update dependencies	2024-05-09 07:03:08 +00:00
Michel Hollands	b99d816057	Add Alloy dashboards and metrics Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 15:59:22 +01:00
Michel Hollands	f89a6816a8	Scrape more metrics from more places Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 13:06:03 +01:00
Michel Hollands	890137e7b3	Merge pull request #108 from grafana/fix_rules Add groups to loki-rules so they are parsed correctly	2024-05-08 11:08:42 +01:00
Michel Hollands	75395ba196	Add groups to loki-rules so they are parsed correctly Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 11:08:20 +01:00
Michel Hollands	7e3145e2eb	Merge pull request #107 from grafana/remove_rules_files_mimir_tempo Remove unused ruler files	2024-05-08 10:48:27 +01:00
Michel Hollands	232777d71a	Remove unused ruler files Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 10:47:52 +01:00
Michel Hollands	d9a4d4a964	Merge pull request #106 from grafana/split_up_grafana_template Split up Grafana yaml	2024-05-08 10:39:29 +01:00
Michel Hollands	57adbf43e2	Split up Grafana yaml Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 10:39:02 +01:00
Michel Hollands	add43ae974	Merge pull request #105 from grafana/remove_redundant_variables Remove unused variables	2024-05-08 10:25:37 +01:00
Michel Hollands	52ec526718	Remove unused variables Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 10:24:48 +01:00
Michel Hollands	8a5ed559a2	Merge pull request #104 from grafana/fix_dependency_check Fix name and indentation of workflow	2024-05-08 09:49:07 +01:00
Michel Hollands	188cd7e56f	Fix name and indentation of workflow Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-08 09:46:42 +01:00
Michel Hollands	9e4dbcd44a	Merge pull request #100 from grafana/combine_ci Combine dependency updates	2024-05-08 09:40:07 +01:00
Michel Hollands	28daa27fca	Merge pull request #99 from grafana/chore/update-minio [dependency] Update the Grafana version	2024-05-08 09:38:26 +01:00
Michel Hollands	2de595baf4	Merge branch 'main' into chore/update-minio	2024-05-08 09:37:45 +01:00
Michel Hollands	95257b66d3	Merge pull request #103 from grafana/chore/update-tempo-distributed [dependency] Update the Tempo Distributed subchart	2024-05-08 09:36:02 +01:00
Michel Hollands	e9b0e57ef0	Merge pull request #95 from grafana/update_grafana Add CI action to update Grafana version	2024-05-08 09:35:29 +01:00
Michel Hollands	03609ebb35	Merge pull request #102 from grafana/fix_alloy_config_for_traces Fix the alloy config	2024-05-08 09:34:53 +01:00
MichelHollands	7e38d19814	Update Tempo Distributed	2024-05-08 07:03:26 +00:00
Michel Hollands	32272298d7	Fix the alloy config Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 16:35:00 +01:00
Michel Hollands	3879207e05	Merge pull request #101 from grafana/fix_minio_secret_name Fix secret name	2024-05-07 14:40:52 +01:00
Michel Hollands	c6d0444dfa	Combine dependency updates Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 11:26:32 +01:00
MichelHollands	d938dbbfe5	Update Grafana version	2024-05-07 09:22:19 +00:00
Michel Hollands	e9125d1a9c	Add corrected key Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 10:21:42 +01:00
Michel Hollands	076685ef06	Revert key Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 10:18:55 +01:00
Michel Hollands	b0451d626e	Use $. in yaml key Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 10:16:10 +01:00
Michel Hollands	90e949e89a	Change version param Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 10:14:50 +01:00
Michel Hollands	06e176e720	Trim the v prefix from the released version Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 10:11:17 +01:00
Michel Hollands	d4c886ba9d	Use token from env Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 10:00:55 +01:00
Michel Hollands	643e73f5f1	add token Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 09:54:50 +01:00
Michel Hollands	7e65f3d9c9	Fix sourceid Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 09:46:31 +01:00
Michel Hollands	26e0ad0b85	Add CI action to update Grafana version Signed-off-by: Michel Hollands <michel.hollands@gmail.com>	2024-05-07 09:20:51 +01:00