Compare commits

..

No commits in common. "main" and "add_tempo_dashboards.yaml" have entirely different histories.

106 changed files with 63548 additions and 17045 deletions

View File

@ -1,19 +0,0 @@
apiVersion: kind.x-k8s.io/v1alpha4
kind: Cluster
nodes:
- role: control-plane
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
controllerManager:
extraArgs:
bind-address: 0.0.0.0
secure-port: "10257"
scheduler:
extraArgs:
bind-address: 0.0.0.0
secure-port: "10259"
- |
kind: KubeProxyConfiguration
metricsBindAddress: 0.0.0.0:10249
- role: worker

View File

@ -1,3 +0,0 @@
owner: grafana
git-repo: helm-charts
skip-existing: true

View File

@ -1,15 +0,0 @@
## Reference: https://github.com/helm/chart-testing/blob/master/doc/ct_lint-and-install.md
remote: origin
target-branch: main
chart-dirs:
- charts
chart-repos:
- grafana=https://grafana.github.io/helm-charts
- minio=https://charts.min.io
validate-chart-schema: true
validate-maintainers: true
validate-yaml: true
exclude-deprecated: true
excluded-charts: []
namespace: meta-monitoring # Need to set the namespace because we create the secret there
release-label: app.kubernetes.io/instance

View File

@ -1,30 +0,0 @@
name: Bump dependency "alloy" for Helm chart "meta-monitoring"
sources:
alloy:
name: Get latest "alloy" Helm chart version
kind: helmchart
spec:
name: alloy
url: https://grafana.github.io/helm-charts
versionfilter:
kind: semver
pattern: '*'
conditions:
alloy:
name: Ensure Helm chart dependency "alloy" is specified
kind: yaml
spec:
file: charts/meta-monitoring/Chart.yaml
key: $.dependencies[1].name
value: alloy
disablesourceinput: true
targets:
alloy:
name: Bump Helm chart dependency "alloy" for Helm chart "meta-monitoring"
kind: helmchart
spec:
file: Chart.yaml
key: $.dependencies[1].version
name: charts/meta-monitoring
versionincrement: none
sourceid: alloy

View File

@ -1,30 +0,0 @@
name: Bump grafana version specified in the values.yaml
sources:
latestGrafanaRelease:
name: Get latest grafana release on Github
kind: githubrelease
spec:
owner: grafana
repository: grafana
token: '{{ requiredEnv "UPDATECLI_GITHUB_TOKEN" }}'
versionfilter:
kind: latest
transformers:
- trimprefix: "v"
conditions:
grafanaImagePublished:
name: Ensure the latest Grafana is published on DockerHub
kind: dockerimage
source-id: latestGrafanaRelease
spec:
image: "grafana/grafana"
targets:
grafana:
name: Update Grafana version in values.yaml
kind: helmchart
spec:
file: values.yaml
key: $.grafana.version
name: charts/meta-monitoring
versionincrement: none
sourceid: latestGrafanaRelease

View File

@ -1,30 +0,0 @@
name: Bump dependency "loki" for Helm chart "meta-monitoring"
sources:
loki:
name: Get latest "loki" Helm chart version
kind: helmchart
spec:
name: loki
url: https://grafana.github.io/helm-charts
versionfilter:
kind: semver
pattern: '*'
conditions:
loki:
name: Ensure Helm chart dependency "loki" is specified
kind: yaml
spec:
file: charts/meta-monitoring/Chart.yaml
key: $.dependencies[0].name
value: loki
disablesourceinput: true
targets:
loki:
name: Bump Helm chart dependency "loki" for Helm chart "meta-monitoring"
kind: helmchart
spec:
file: Chart.yaml
key: $.dependencies[0].version
name: charts/meta-monitoring
versionincrement: none
sourceid: loki

View File

@ -1,30 +0,0 @@
name: Bump dependency "mimir-distributed" for Helm chart "meta-monitoring"
sources:
mimir-distributed:
name: Get latest "mimir-distributed" Helm chart version
kind: helmchart
spec:
name: mimir-distributed
url: https://grafana.github.io/helm-charts
versionfilter:
kind: semver
pattern: '*'
conditions:
mimir-distributed:
name: Ensure Helm chart dependency "mimir-distributed" is specified
kind: yaml
spec:
file: charts/meta-monitoring/Chart.yaml
key: $.dependencies[2].name
value: mimir-distributed
disablesourceinput: true
targets:
mimir-distributed:
name: Bump Helm chart dependency "mimir-distributed" for Helm chart "meta-monitoring"
kind: helmchart
spec:
file: Chart.yaml
key: $.dependencies[2].version
name: charts/meta-monitoring
versionincrement: none
sourceid: mimir-distributed

View File

@ -1,30 +0,0 @@
name: Bump dependency "minio" for Helm chart "meta-monitoring"
sources:
minio:
name: Get latest "minio" Helm chart version
kind: helmchart
spec:
name: minio
url: https://charts.min.io
versionfilter:
kind: semver
pattern: '*'
conditions:
minio:
name: Ensure Helm chart dependency "minio" is specified
kind: yaml
spec:
file: charts/meta-monitoring/Chart.yaml
key: $.dependencies[4].name
value: minio
disablesourceinput: true
targets:
minio:
name: Bump Helm chart dependency "minio" for Helm chart "meta-monitoring"
kind: helmchart
spec:
file: Chart.yaml
key: $.dependencies[4].version
name: charts/meta-monitoring
versionincrement: none
sourceid: minio

View File

@ -1,30 +0,0 @@
name: Bump dependency "tempo-distributed" for Helm chart "meta-monitoring"
sources:
tempo-distributed:
name: Get latest "tempo-distributed" Helm chart version
kind: helmchart
spec:
name: tempo-distributed
url: https://grafana.github.io/helm-charts
versionfilter:
kind: semver
pattern: '*'
conditions:
tempo-distributed:
name: Ensure Helm chart dependency "tempo-distributed" is specified
kind: yaml
spec:
file: charts/meta-monitoring/Chart.yaml
key: $.dependencies[3].name
value: tempo-distributed
disablesourceinput: true
targets:
tempo-distributed:
name: Bump Helm chart dependency "tempo-distributed" for Helm chart "meta-monitoring"
kind: helmchart
spec:
file: Chart.yaml
key: $.dependencies[3].version
name: charts/meta-monitoring
versionincrement: none
sourceid: tempo-distributed

View File

@ -1,151 +0,0 @@
---
name: Check for dependency updates
on:
workflow_dispatch:
schedule:
# Run once a day
- cron: '0 7 * * *'
permissions:
contents: "write"
pull-requests: "write"
env:
UPDATECLI_CONFIG_DIR: "${{ github.workspace }}/.github/configs/updatecli.d"
UPDATECLI_GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
jobs:
updateVersions:
name: Update the subcharts
runs-on: "ubuntu-latest"
permissions:
contents: write
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install Updatecli
uses: updatecli/updatecli-action@v2
- name: Run Updatecli for Loki
id: update-loki
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/loki.yaml
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
- name: Run Updatecli for Alloy
id: update-grafana-alloy
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/alloy.yaml
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
- name: Run Updatecli for Mimir
id: update-mimir-distributed
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/mimir-distributed.yaml
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
- name: Run Updatecli for Tempo
id: update-tempo-distributed
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/tempo-distributed.yaml
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
- name: Run Updatecli for Minio
id: update-minio
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/minio.yaml
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
- id: get-secrets
uses: grafana/shared-workflows/actions/get-vault-secrets@main
with:
# Secrets placed in the ci/repo/grafana/<repo>/<path> path in Vault
repo_secrets: |
APP_ID=github-app:app-id
PRIVATE_KEY=github-app:private-key
- uses: actions/create-github-app-token@v1
id: app-token
with:
app-id: ${{ env.APP_ID }}
private-key: ${{ env.PRIVATE_KEY }}
- name: Create pull request
if: steps.update-loki.outputs.changed == 'true' || steps.update-grafana-alloy.outputs.changed == 'true' || steps.update-mimir-distributed.outputs.changed == 'true' || steps.update-tempo-distributed.outputs.changed == 'true' || steps.update-minio.outputs.changed == 'true'
uses: peter-evans/create-pull-request@v5
with:
title: "[dependency] Update the subcharts"
body: "Updates the subcharts"
base: main
author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
committer: "GitHub <noreply@github.com>"
commit-message: Update dependencies
labels: dependencies
branch: chore/update-dependencies
delete-branch: true
team-reviewers: "@grafana/loki-squad"
token: ${{ steps.app-token.outputs.token }}
updateGrafana:
name: Update the Grafana version
runs-on: "ubuntu-latest"
permissions:
contents: write
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install Updatecli
uses: updatecli/updatecli-action@v2
- name: Run Updatecli
id: update-grafana
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/grafana.yaml
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
- id: get-secrets
uses: grafana/shared-workflows/actions/get-vault-secrets@main
with:
# Secrets placed in the ci/repo/grafana/<repo>/<path> path in Vault
repo_secrets: |
APP_ID=github-app:app-id
PRIVATE_KEY=github-app:private-key
- uses: actions/create-github-app-token@v1
id: app-token
with:
app-id: ${{ env.APP_ID }}
private-key: ${{ env.PRIVATE_KEY }}
- name: Create pull request
if: steps.update-grafana.outputs.changed == 'true'
uses: peter-evans/create-pull-request@v5
with:
title: "[dependency] Update the Grafana version"
body: "Updates the Grafana version"
base: main
author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
committer: "GitHub <noreply@github.com>"
commit-message: Update Grafana version
labels: dependencies
branch: chore/update-minio
delete-branch: true
team-reviewers: "@grafana/loki-squad"
token: ${{ steps.app-token.outputs.token }}

View File

@ -1,67 +0,0 @@
---
name: helm-ci
on:
workflow_dispatch:
pull_request:
paths:
- "charts/meta-monitoring/**"
env:
CT_CONFIGFILE: charts/meta-monitoring/ct.yaml
jobs:
call-lint:
name: Lint Helm Chart
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Lint Yaml
run: make helm-lint
# call-test:
# name: Test Helm Chart
# runs-on: ubuntu-latest
# steps:
# - name: Checkout
# uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - name: Set up Helm
# uses: azure/setup-helm@v3
# with:
# version: v3.8.2
# # Python is required because `ct lint` runs Yamale (https://github.com/23andMe/Yamale) and
# # yamllint (https://github.com/adrienverge/yamllint) which require Python
# - name: Set up Python
# uses: actions/setup-python@v4
# with:
# python-version: 3.9
# - name: Set up chart-testing
# uses: helm/chart-testing-action@v2
# - name: Run chart-testing (list-changed)
# id: list-changed
# run: |
# changed=$(ct list-changed --config "${CT_CONFIGFILE}")
# if [[ -n "$changed" ]]; then
# echo "changed=true" >> $GITHUB_OUTPUT
# fi
# - name: Run chart-testing (lint)
# run: ct lint --config "${CT_CONFIGFILE}" --check-version-increment=false
# - name: Create kind cluster
# uses: helm/kind-action@v1
# if: steps.list-changed.outputs.changed == 'true'
# with:
# config: "${{ github.workspace }}/.github/configs/cluster-config.yaml"
# - name: Run chart-testing (install)
# run: |
# changed=$(ct list-changed --config "${CT_CONFIGFILE}")
# ct install --config "${CT_CONFIGFILE}"

View File

@ -1,175 +0,0 @@
name: Release Helm chart
on:
workflow_dispatch:
env:
CR_CONFIGFILE: "${{ github.workspace }}/source/.github/configs/cr.yaml"
CT_CONFIGFILE: "${{ github.workspace }}/source/.github/configs/ct.yaml"
CR_INDEX_PATH: "${{ github.workspace }}/.cr-index"
CR_PACKAGE_PATH: "${{ github.workspace }}/.cr-release-packages"
CR_TOOL_PATH: "${{ github.workspace }}/.cr-tool"
CR_VERSION: "1.5.0"
jobs:
setup:
runs-on: ubuntu-latest
outputs:
changed: ${{ steps.list-changed.outputs.changed }}
chartpath: ${{ steps.list-changed.outputs.chartpath }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
path: source
- name: Install chart-testing
uses: helm/chart-testing-action@v2
- name: List changed charts
id: list-changed
run: |
cd source
latest_tag=$( if ! git describe --tags --abbrev=0 --match='helm-chart/*' 2> /dev/null ; then git rev-list --max-parents=0 --first-parent HEAD; fi )
echo "Running: ct list-changed --config ${CT_CONFIGFILE} --since ${latest_tag} --target-branch ${{ github.ref_name }}"
changed=$(ct list-changed --config "${CT_CONFIGFILE}" --since "${latest_tag}" --target-branch "${{ github.ref_name }}")
echo "${changed}"
num_changed=$(wc -l <<< ${changed})
if [[ "${num_changed}" -gt "1" ]] ; then
echo "More than one chart changed, exiting"
exit 1
fi
if [[ -n "${changed}" ]]; then
name=$(yq ".name" < ${changed}/Chart.yaml)
version=$(yq ".version" < ${changed}/Chart.yaml)
tagname="v${version}"
if [ $(git tag -l "${tagname}") ]; then
echo "Tag ${tagname} already exists, skipping release"
echo "changed=false" >> $GITHUB_OUTPUT
else
echo "Releasing ${changed}"
echo "changed=true" >> $GITHUB_OUTPUT
echo "chartpath=${changed}" >> $GITHUB_OUTPUT
fi
else
echo "No charts have changed, skipping release"
echo "changed=false" >> $GITHUB_OUTPUT
fi
release:
needs: [setup]
runs-on: ubuntu-latest
if: needs.setup.outputs.changed == 'true'
permissions:
contents: write
id-token: write
steps:
- id: get-secrets
uses: grafana/shared-workflows/actions/get-vault-secrets@main
with:
# Secrets placed in the ci/repo/grafana/<repo>/<path> path in Vault
repo_secrets: |
APP_ID=github-app:app-id
PRIVATE_KEY=github-app:private-key
- uses: actions/create-github-app-token@v1
id: app-token
with:
app-id: ${{ env.APP_ID }}
private-key: ${{ env.PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
path: source
- name: Configure Git
run: |
cd source
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Checkout helm-charts
uses: actions/checkout@v4
with:
fetch-depth: 0
repository: grafana/helm-charts
path: helm-charts
token: "${{ steps.app-token.outputs.token }}"
- name: Configure Git for helm-charts
run: |
cd helm-charts
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Set up Helm
uses: azure/setup-helm@v4
- name: Parse Chart.yaml
id: parse-chart
run: |
cd source
changed="${{ needs.setup.outputs.chartpath }}"
description=$(yq ".description" < ${changed}/Chart.yaml)
name=$(yq ".name" < ${changed}/Chart.yaml)
version=$(yq ".version" < ${changed}/Chart.yaml)
echo "chartpath=${changed}" >> $GITHUB_OUTPUT
echo "desc=${description}" >> $GITHUB_OUTPUT
echo "tagname=v${version}" >> $GITHUB_OUTPUT
echo "packagename=${name}-${version}" >> $GITHUB_OUTPUT
- name: Install CR tool
run: |
mkdir "${CR_TOOL_PATH}"
mkdir "${CR_PACKAGE_PATH}"
mkdir "${CR_INDEX_PATH}"
curl -sSLo cr.tar.gz "https://github.com/helm/chart-releaser/releases/download/v${CR_VERSION}/chart-releaser_${CR_VERSION}_linux_amd64.tar.gz"
tar -xzf cr.tar.gz -C "${CR_TOOL_PATH}"
rm -f cr.tar.gz
- name: Create Helm package
run: |
cd source
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add minio https://charts.min.io
"${CR_TOOL_PATH}/cr" package "${{ steps.parse-chart.outputs.chartpath }}" --config "${CR_CONFIGFILE}" --package-path "${CR_PACKAGE_PATH}"
- name: Make a release on this repo
uses: softprops/action-gh-release@v2
with:
name: ${{ steps.parse-chart.outputs.tagname }}
repository: grafana/meta-monitoring-chart
tag_name: ${{ steps.parse-chart.outputs.tagname }}
token: ${{ steps.app-token.outputs.token }}
generate_release_notes: true
files: |
${{ env.CR_PACKAGE_PATH }}/${{ steps.parse-chart.outputs.packagename }}.tgz
# Note that this creates a release in grafana/helm-charts with a new tag.
# The tag name in grafana/helm-charts is <package>-<version>, while the
# tag name for grafana/meta-monitoring-chart is <version>.
- name: Make release on Helm Charts
uses: softprops/action-gh-release@v2
with:
name: ${{ steps.parse-chart.outputs.packagename }}
repository: grafana/helm-charts
tag_name: ${{ steps.parse-chart.outputs.packagename }}
token: ${{ steps.app-token.outputs.token }}
body: |
${{ steps.parse-chart.outputs.desc }}
Source commit: https://github.com/${{ github.repository }}/commit/${{ github.sha }}
Tag on source: https://github.com/${{ github.repository }}/releases/tag/${{ steps.parse-chart.outputs.tagname }}
files: |
${{ env.CR_PACKAGE_PATH }}/${{ steps.parse-chart.outputs.packagename }}.tgz
- name: Update helm-charts index.yaml
run: |
cd helm-charts
"${CR_TOOL_PATH}/cr" index --config "${CR_CONFIGFILE}" --token "${{ steps.app-token.outputs.token }}" --index-path "${CR_INDEX_PATH}" --package-path "${CR_PACKAGE_PATH}" --push

1
.gitignore vendored
View File

@ -1 +0,0 @@
.DS_Store

View File

@ -1,10 +0,0 @@
# Adapted from https://www.thapaliya.com/en/writings/well-documented-makefiles/
.PHONY: help
help: ## Display this help and any documented user-facing targets. Other undocumented targets may be present in the Makefile.
help:
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make <target>\n\nTargets:\n"} /^[a-zA-Z_-]+:.*?##/ { printf " %-45s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
.PHONY: helm-lint
helm-lint: ## Run helm linter
$(MAKE) -BC charts/meta-monitoring lint

View File

@ -1,41 +1,42 @@
# meta-monitoring-chart # meta-monitoring-chart
This is a meta-monitoring chart for Loki, specifically Loki installed via the Loki helm chart. This is a meta-monitoring chart for GEL, GEM and GET. It should be installed in a
separate namespace next to GEM, GEL or GET installations.
## Preparation
Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml).
1. Add or remove the namespaces to monitor in the `namespacesToMonitor` setting
1. Set the cluster name in the `clusterName` setting. This will be added as a label to all logs, metrics and traces.
1. Create a `meta` namespace.
## Local and cloud modes ## Local and cloud modes
The chart has 2 modes: local and cloud. In the local mode logs, metrics and/or traces are sent The chart has 2 modes: local and cloud. In the local mode logs, metrics and traces are sent
to small Loki, Mimir and Tempo installations running in the meta-monitoring namespace. to small Loki, Mimir and Tempo installations running in the meta-monitoring namespace.
![local mode](docs/images/Meta%20monitoring%20local.png) ![local mode](docs/images/Meta%20monitoring%20local.png)
In the cloud mode the logs, metrics and/or traces are sent to Grafana Cloud. To enable local mode set `local.enabled` to true.
In the cloud mode the logs, metrics and traces are sent to
![cloud mode](docs/images/Meta%20monitoring%20cloud.png) ![cloud mode](docs/images/Meta%20monitoring%20cloud.png)
Both modes can be enabled at the same time. Cloud mode is preferred. To enable cloud mode set `cloud.enabled` to true. The `endpoint`, `username` and `password` settings for your Grafana Cloud logs, metrics and traces instances have to be filled in as well.
Both modes can be enabled at the same time.
## Installation ## Installation
For more instructions including how to install the chart go to the [installation](docs/installation.md) page. ```
helm install -n meta -f values.yaml meta ./charts/meta-monitoring
```
## Supported features For more instructions including how to update the chart go to the [installation](docs/installation.md) page.
- Specify which namespaces are monitored
- Specify if logs, metrics or traces should be enabled for cloud or local
- Specify the cluster name used for the logs, metrics and traces
- Specify PII regexes that are applied to logs before they are sent to Loki (cloud or local). The capture group in the regex is replaced with *****.
- a Grafana instance is installed (when local mode is used) with the relevant datasources installed. The following dashboards are installed:
- logs dashboards
- Alloy dashboards
Most of these features are enabled by default. See the values.yaml file for how to enable/disable them.
## Caveats
- This has not been tested on Openshift yet.
- The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
- MinIO is used as storage for the local mode at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.
## Developer help topics ## Developer help topics

View File

@ -1,18 +1,18 @@
dependencies: dependencies:
- name: loki - name: loki
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 6.29.0 version: 5.8.0
- name: alloy - name: grafana-agent
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 0.12.5 version: 0.15.0
- name: mimir-distributed - name: mimir-distributed
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 5.6.0 version: 4.4.1
- name: tempo-distributed - name: tempo-distributed
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 1.33.0 version: 1.4.7
- name: minio - name: minio
repository: https://charts.min.io repository: https://charts.min.io
version: 5.4.0 version: 5.0.11
digest: sha256:5225a03d9003384639f5d43b1971126371269347f16f221b7aed377ab85d71be digest: sha256:4b04084e6fe821c4d481017b2430f7c8cd782a5d60830dd3a24eb8f10a9ece09
generated: "2025-03-27T07:03:11.17404081Z" generated: "2023-06-29T14:25:07.247853+01:00"

View File

@ -1,6 +1,7 @@
apiVersion: v2 apiVersion: v2
name: meta-monitoring name: meta-monitoring
description: A Helm chart for meta monitoring Grafana Loki, Mimir and Tempo description: A Helm chart for meta monitoring Grafana Loki, Mimir and Tempo
# A chart can be either an 'application' or a 'library' chart. # A chart can be either an 'application' or a 'library' chart.
# #
# Application charts are a collection of templates that can be packaged into versioned archives # Application charts are a collection of templates that can be packaged into versioned archives
@ -10,32 +11,35 @@ description: A Helm chart for meta monitoring Grafana Loki, Mimir and Tempo
# a dependency of application charts to inject those utilities and functions into the rendering # a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed. # pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application type: application
# This is the chart version. This version number should be incremented each time you make changes # This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version. # to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/) # Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.3.0 version: 0.0.1
# This is the version number of the application being deployed. This version number should be # This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to # incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using. # follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes. # It is recommended to use it with quotes.
appVersion: "0.0.1" appVersion: "0.0.1"
dependencies: dependencies:
- name: loki - name: loki
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 6.29.0 version: "5.8.0"
condition: local.logs.enabled condition: local.logs.enabled
- name: alloy - name: grafana-agent
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 0.12.5 version: "0.15.0"
- name: mimir-distributed - name: mimir-distributed
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 5.6.0 version: "4.4.1"
condition: local.metrics.enabled condition: local.metrics.enabled
- name: tempo-distributed - name: tempo-distributed
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 1.33.0 version: "1.4.7"
condition: local.traces.enabled condition: local.traces.enabled
- name: minio - name: minio
repository: https://charts.min.io repository: https://charts.min.io
version: 5.4.0 version: "5.0.11"
condition: local.minio.enabled condition: local.minio.enabled

View File

@ -1,7 +0,0 @@
.DEFAULT_GOAL := lint
.PHONY: lint lint-yaml
lint: lint-yaml
lint-yaml:
yamllint -c $(CURDIR)/src/.yamllint.yaml $(CURDIR)/src

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,11 +0,0 @@
---
remote: origin
target-branch: main
chart-dirs:
- charts
chart-repos:
- grafana=https://grafana.github.io/helm-charts
- minio=https://charts.min.io
helm-extra-args: --timeout 1200s
check-version-increment: false
validate-maintainers: false

View File

@ -1,4 +0,0 @@
---
rules:
quoted-strings:
required: true

File diff suppressed because it is too large Load Diff

View File

@ -1,540 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
},
{
"datasource": "$loki_datasource",
"enable": true,
"expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"",
"iconColor": "rgba(0, 211, 255, 1)",
"instant": false,
"name": "Deployments",
"titleFormat": "{{cluster}}/{{namespace}}"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 27,
"links": [
{
"icon": "doc",
"targetBlank": true,
"title": "Documentation",
"tooltip": "Clustering documentation",
"type": "link",
"url": "https://grafana.com/docs/alloy/latest/reference/cli/run/#clustered-mode"
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"alloy-mixin"
],
"targetBlank": false,
"title": "Dashboards",
"type": "dashboards"
}
],
"panels": [
{
"datasource": "${datasource}",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 8,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Nodes",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "Nodes info.\n",
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Dashboard"
},
"properties": [
{
"id": "mappings",
"value": [
{
"options": {
"1": {
"index": 0,
"text": "Link"
}
},
"type": "value"
}
]
},
{
"id": "links",
"value": [
{
"targetBlank": false,
"title": "Detail dashboard for node",
"url": "/d/4047e755d822da63c8158cde32ae4dce/alloy-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}"
}
]
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 16,
"x": 8,
"y": 0
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Node table",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Value": false,
"__name__": true,
"cluster": true,
"namespace": true,
"state": false
},
"indexByName": {},
"renameByName": {
"Value": "Dashboard",
"instance": "",
"state": ""
}
}
}
],
"type": "table"
},
{
"datasource": "${datasource}",
"description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n",
"fieldConfig": {
"defaults": {
"mappings": [
{
"options": {
"1": {
"color": "red",
"index": 1,
"text": "Not converged"
}
},
"type": "value"
},
{
"options": {
"match": "null",
"result": {
"color": "green",
"index": 0,
"text": "Converged"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "suffix:nodes"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 8,
"x": 0,
"y": 9
},
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n",
"format": "time_series",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Convergance state",
"type": "stat"
},
{
"datasource": "${datasource}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-GrYlRd"
},
"custom": {
"fillOpacity": 80,
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineWidth": 0,
"spanNulls": true
},
"mappings": [
{
"options": {
"0": {
"color": "green",
"text": "Yes"
}
},
"type": "value"
},
{
"options": {
"1": {
"color": "red",
"text": "No"
}
},
"type": "value"
}
],
"max": 1,
"noValue": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 16,
"x": 8,
"y": 9
},
"id": 4,
"options": {
"alignValue": "left",
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"mergeValues": true,
"rowHeight": 0.9,
"showValue": "auto",
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n",
"instant": false,
"legendFormat": "Converged",
"range": true,
"refId": "A"
}
],
"title": "Convergance state timeline",
"type": "state-timeline"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"alloy-mixin"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Mimir",
"value": "mimir_ds"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "Loki",
"value": "loki_ds"
},
"hide": 0,
"includeAll": false,
"label": "Loki Data Source",
"multi": false,
"name": "loki_datasource",
"options": [],
"query": "loki",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components, cluster)\n",
"refId": "cluster"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
"refId": "namespace"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d",
"90d"
]
},
"timezone": "",
"title": "Alloy / Cluster Overview",
"uid": "",
"version": 0,
"weekStart": ""
}

View File

@ -1,970 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
},
{
"datasource": "$loki_datasource",
"enable": true,
"expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"",
"iconColor": "rgba(0, 211, 255, 1)",
"instant": false,
"name": "Deployments",
"titleFormat": "{{cluster}}/{{namespace}}"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 28,
"links": [
{
"icon": "doc",
"targetBlank": true,
"title": "Documentation",
"tooltip": "Component controller documentation",
"type": "link",
"url": "https://grafana.com/docs/alloy/latest/concepts/component_controller/"
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"alloy-mixin"
],
"targetBlank": false,
"title": "Dashboards",
"type": "dashboards"
}
],
"panels": [
{
"datasource": "${datasource}",
"description": "The number of Alloy instances whose metrics are being sent and reported.\n",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "instances"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 10,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "count(alloy_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Running instances",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "Breakdown of components by health across all running instances.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Alloy UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n",
"fieldConfig": {
"defaults": {
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Unhealthy"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Unknown"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "blue",
"value": 1
}
]
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Exited"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 1
}
]
}
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 14,
"x": 10,
"y": 0
},
"id": 4,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "vertical",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"sizing": "auto",
"valueMode": "color"
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)",
"instant": true,
"legendFormat": "Healthy",
"range": false,
"refId": "A"
},
{
"datasource": "${datasource}",
"expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)",
"instant": true,
"legendFormat": "Unhealthy",
"range": false,
"refId": "B"
},
{
"datasource": "${datasource}",
"expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)",
"instant": true,
"legendFormat": "Unknown",
"range": false,
"refId": "C"
},
{
"datasource": "${datasource}",
"expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)",
"instant": true,
"legendFormat": "Exited",
"range": false,
"refId": "D"
}
],
"title": "Components by health",
"type": "bargauge"
},
{
"datasource": "${datasource}",
"description": "The number of running components across all running instances.\n",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "components"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 10,
"x": 0,
"y": 4
},
"id": 2,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Running components",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "The percentage of components which are in a healthy state.\n",
"fieldConfig": {
"defaults": {
"mappings": [],
"max": 1,
"min": 0,
"noValue": "No components",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 10,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"text": {
"valueSize": 80
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Overall component health",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "The frequency at which components get updated.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 3,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 0,
"y": 12
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "sum by (instance) (rate(alloy_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Component evaluation rate",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 8,
"y": 12
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "histogram_quantile(0.99, sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n",
"instant": false,
"legendFormat": "99th percentile",
"range": true,
"refId": "A"
},
{
"datasource": "${datasource}",
"expr": "histogram_quantile(0.50, sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n",
"instant": false,
"legendFormat": "50th percentile",
"range": true,
"refId": "B"
},
{
"datasource": "${datasource}",
"expr": "(\n histogram_sum(sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(alloy_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(alloy_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n",
"instant": false,
"legendFormat": "Average",
"range": true,
"refId": "C"
}
],
"title": "Component evaluation time",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 16,
"y": 12
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "sum by (component_path, component_id) (rate(alloy_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(alloy_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n",
"instant": false,
"legendFormat": "{{component path}} {{component_id}}",
"range": true,
"refId": "A"
}
],
"title": "Slow components evaluation times",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 0,
"y": 22
},
"id": 8,
"maxDataPoints": 30,
"options": {
"calculate": false,
"cellGap": 0,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 0.1
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": true
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
}
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum(increase(alloy_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n",
"format": "heatmap",
"instant": false,
"legendFormat": "{{le}}",
"range": true,
"refId": "A"
}
],
"title": "Component evaluation histogram",
"type": "heatmap"
},
{
"datasource": "${datasource}",
"description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 8,
"y": 22
},
"id": 9,
"maxDataPoints": 30,
"options": {
"calculate": false,
"cellGap": 0,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 0.1
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": true
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
}
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum(increase(alloy_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(alloy_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n",
"format": "heatmap",
"instant": false,
"legendFormat": "{{le}}",
"range": true,
"refId": "A"
}
],
"title": "Component dependency wait histogram",
"type": "heatmap"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"alloy-mixin"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Mimir",
"value": "mimir_ds"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "Loki",
"value": "loki_ds"
},
"hide": 0,
"includeAll": false,
"label": "Loki Data Source",
"multi": false,
"name": "loki_datasource",
"options": [],
"query": "loki",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components, cluster)\n",
"refId": "cluster"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
"refId": "namespace"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d",
"90d"
]
},
"timezone": "",
"title": "Alloy / Controller",
"uid": "bf9f456aad7108b2c808dbd9973e386f",
"version": 0,
"weekStart": ""
}

View File

@ -1,923 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 25,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"alloy-mixin"
],
"targetBlank": false,
"title": "Dashboards",
"type": "dashboards"
}
],
"panels": [
{
"datasource": "${datasource}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"title": "Receivers for traces [otelcol.receiver]",
"type": "row"
},
{
"datasource": "${datasource}",
"description": "Number of spans successfully pushed into the pipeline.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{ pod }} / {{ transport }}",
"range": true,
"refId": "A"
}
],
"title": "Accepted spans",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Number of spans that could not be pushed into the pipeline.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 8,
"y": 1
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{ pod }} / {{ transport }}",
"range": true,
"refId": "A"
}
],
"title": "Refused spans",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "The duration of inbound RPCs.\n",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 16,
"y": 1
},
"id": 4,
"maxDataPoints": 30,
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Oranges",
"steps": 65
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": true
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "ms"
}
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))",
"format": "heatmap",
"instant": false,
"legendFormat": "{{le}}",
"range": true,
"refId": "A"
}
],
"title": "RPC server duration",
"type": "heatmap"
},
{
"datasource": "${datasource}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 11
},
"id": 5,
"title": "Batching of logs, metrics, and traces [otelcol.processor.batch]",
"type": "row"
},
{
"datasource": "${datasource}",
"description": "Number of spans, metric datapoints, or log lines in a batch\n",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 0,
"y": 12
},
"id": 6,
"maxDataPoints": 30,
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Oranges",
"steps": 65
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": true
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "short"
}
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": "${datasource}",
"expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "heatmap",
"instant": false,
"legendFormat": "{{le}}",
"range": true,
"refId": "A"
}
],
"title": "Number of units in the batch",
"type": "heatmap"
},
{
"datasource": "${datasource}",
"description": "Number of distinct metadata value combinations being processed\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 8,
"y": 12
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n",
"instant": false,
"legendFormat": "{{ pod }}",
"range": true,
"refId": "A"
}
],
"title": "Distinct metadata values",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Number of times the batch was sent due to a timeout trigger\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 16,
"y": 12
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{ pod }}",
"range": true,
"refId": "A"
}
],
"title": "Timeout trigger",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 22
},
"id": 9,
"title": "Exporters for traces [otelcol.exporter]",
"type": "row"
},
{
"datasource": "${datasource}",
"description": "Number of spans successfully sent to destination.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 0,
"y": 23
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{ pod }}",
"range": true,
"refId": "A"
}
],
"title": "Exported sent spans",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Number of spans in failed attempts to send to destination.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 8,
"x": 8,
"y": 23
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{ pod }}",
"range": true,
"refId": "A"
}
],
"title": "Exported failed spans",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"alloy-mixin"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Mimir",
"value": "mimir_ds"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "Loki",
"value": "loki_ds"
},
"hide": 0,
"includeAll": false,
"label": "Loki Data Source",
"multi": false,
"name": "loki_datasource",
"options": [],
"query": "loki",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components, cluster)\n",
"refId": "cluster"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
"refId": "namespace"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
},
{
"allValue": ".*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": true,
"label": "instance",
"multi": true,
"name": "instance",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n",
"refId": "instance"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d",
"90d"
]
},
"timezone": "",
"title": "Alloy / OpenTelemetry",
"uid": "9b6d37c8603e19e8922133984faad93d",
"version": 0,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

View File

@ -1,840 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
},
{
"datasource": "$loki_datasource",
"enable": true,
"expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"",
"iconColor": "rgba(0, 211, 255, 1)",
"instant": false,
"name": "Deployments",
"titleFormat": "{{cluster}}/{{namespace}}"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 26,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"alloy-mixin"
],
"targetBlank": false,
"title": "Dashboards",
"type": "dashboards"
}
],
"panels": [
{
"datasource": "${datasource}",
"description": "CPU usage of the Alloy process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(alloy_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "CPU usage",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Resident memory size of the Alloy process.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "alloy_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Memory (RSS)",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Rate at which the Alloy process performs garbage collections.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 3,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Garbage collections",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 8
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Goroutines",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Heap memory currently in use by the Alloy process.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 8
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Memory (heap inuse)",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Rate of data received across all network interfaces for the machine\nAlloy is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Alloy process.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(alloy_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Network receive bandwidth",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Rate of data sent across all network interfaces for the machine\nAlloy is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Alloy process.\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${datasource}",
"expr": "rate(alloy_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n",
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Network send bandwidth",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"alloy-mixin"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Mimir",
"value": "mimir_ds"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "Loki",
"value": "loki_ds"
},
"hide": 0,
"includeAll": false,
"label": "Loki Data Source",
"multi": false,
"name": "loki_datasource",
"options": [],
"query": "loki",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "datasource"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components, cluster)\n",
"refId": "cluster"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
},
{
"current": {
"isNone": true,
"selected": false,
"text": "None",
"value": ""
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n",
"refId": "namespace"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
},
{
"allValue": ".*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"uid": "${datasource}"
},
"definition": "",
"hide": 0,
"includeAll": true,
"label": "instance",
"multi": true,
"name": "instance",
"options": [],
"query": {
"query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n",
"refId": "instance"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d",
"90d"
]
},
"timezone": "",
"title": "Alloy / Resources",
"uid": "d6a8574c31f3d7cb8f1345ec84d15a67",
"version": 0,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

View File

@ -35,7 +35,6 @@
"fill": 1, "fill": 1,
"format": "none", "format": "none",
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -63,6 +62,7 @@
"expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2,
"refId": "A" "refId": "A"
} }
], ],
@ -111,7 +111,6 @@
"fill": 1, "fill": 1,
"format": "dtdurations", "format": "dtdurations",
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -139,6 +138,7 @@
"expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2,
"refId": "A" "refId": "A"
} }
], ],
@ -191,148 +191,232 @@
"height": "250px", "height": "250px",
"panels": [ "panels": [
{ {
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 3, "id": 3,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4, "span": 4,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "in progress", "legendFormat": "in progress",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "# of Delete Requests (received - processed) ", "title": "# of Delete Requests (received - processed) ",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}, },
{ {
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 4, "id": 4,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4, "span": 4,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "received", "legendFormat": "received",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Delete Requests Received / Day", "title": "Delete Requests Received / Day",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}, },
{ {
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 5, "id": 5,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4, "span": 4,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "processed", "legendFormat": "processed",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Delete Requests Processed / Day", "title": "Delete Requests Processed / Day",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
} }
], ],
"repeat": null, "repeat": null,
@ -347,148 +431,232 @@
"height": "250px", "height": "250px",
"panels": [ "panels": [
{ {
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 6, "id": 6,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4, "span": 4,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compactor CPU usage", "title": "Compactor CPU usage",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}, },
{ {
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 7, "id": 7,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4, "span": 4,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": " {{pod}} ", "legendFormat": " {{pod}} ",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compactor memory usage (MiB)", "title": "Compactor memory usage (MiB)",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}, },
{ {
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 8, "id": 8,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4, "span": 4,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compaction run duration (seconds)", "title": "Compaction run duration (seconds)",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
} }
], ],
"repeat": null, "repeat": null,
@ -503,100 +671,156 @@
"height": "250px", "height": "250px",
"panels": [ "panels": [
{ {
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 9, "id": 9,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6, "span": 6,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "failures", "legendFormat": "failures",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Failures in Loading Delete Requests / Hour", "title": "Failures in Loading Delete Requests / Hour",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}, },
{ {
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": { "fill": 1,
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [ ]
},
"id": 10, "id": 10,
"interval": "1m",
"links": [ ],
"options": {
"legend": { "legend": {
"showLegend": true "avg": false,
}, "current": false,
"tooltip": { "max": false,
"mode": "single", "min": false,
"sort": "none" "show": true,
} "total": false,
"values": false
}, },
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6, "span": 6,
"stack": false,
"steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}[$__rate_interval])) by (user)", "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{user}}", "legendFormat": "{{user}}",
"legendLink": null "legendLink": null,
"step": 10
} }
], ],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Lines Deleted / Sec", "title": "Lines Deleted / Sec",
"type": "timeseries" "tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
} }
], ],
"repeat": null, "repeat": null,
@ -613,11 +837,10 @@
{ {
"datasource": "$loki_datasource", "datasource": "$loki_datasource",
"id": 11, "id": 11,
"interval": "1m",
"span": 6, "span": 6,
"targets": [ "targets": [
{ {
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ",
"refId": "A" "refId": "A"
} }
], ],
@ -627,11 +850,10 @@
{ {
"datasource": "$loki_datasource", "datasource": "$loki_datasource",
"id": 12, "id": 12,
"interval": "1m",
"span": 6, "span": 6,
"targets": [ "targets": [
{ {
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"",
"refId": "A" "refId": "A"
} }
], ],
@ -660,7 +882,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": "Data source", "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -713,16 +935,6 @@
"tagsQuery": "", "tagsQuery": "",
"type": "query", "type": "query",
"useTags": false "useTags": false
},
{
"hide": 0,
"label": null,
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
} }
] ]
}, },

View File

@ -38,7 +38,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 35, "id": 35,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -78,7 +77,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -115,11 +114,6 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"fill": 1, "fill": 1,
"fillGradient": 0, "fillGradient": 0,
"gridPos": { "gridPos": {
@ -130,7 +124,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 41, "id": 41,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -171,7 +164,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -218,7 +211,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 36, "id": 36,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -244,7 +236,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval]))", "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))",
"refId": "A" "refId": "A"
} }
], ],
@ -258,7 +250,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -295,11 +287,6 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"fill": 1, "fill": 1,
"fillGradient": 0, "fillGradient": 0,
"gridPos": { "gridPos": {
@ -310,7 +297,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 40, "id": 40,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -350,7 +336,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -387,11 +373,6 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "binBps"
}
},
"fill": 1, "fill": 1,
"fillGradient": 0, "fillGradient": 0,
"gridPos": { "gridPos": {
@ -402,7 +383,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 38, "id": 38,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -428,7 +408,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))", "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
"refId": "A" "refId": "A"
} }
], ],
@ -442,7 +422,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -479,11 +459,6 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "binBps"
}
},
"fill": 1, "fill": 1,
"fillGradient": 0, "fillGradient": 0,
"gridPos": { "gridPos": {
@ -494,7 +469,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 39, "id": 39,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -520,7 +494,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))", "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
"refId": "A" "refId": "A"
} }
], ],
@ -534,7 +508,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -581,7 +555,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 37, "id": 37,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -622,7 +595,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -659,11 +632,6 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "ops"
}
},
"fill": 1, "fill": 1,
"fillGradient": 0, "fillGradient": 0,
"gridPos": { "gridPos": {
@ -674,7 +642,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 42, "id": 42,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -700,7 +667,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])) by (level)", "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)",
"legendFormat": "{{level}}", "legendFormat": "{{level}}",
"refId": "A" "refId": "A"
} }
@ -715,7 +682,7 @@
"sort": 0, "sort": 0,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -751,12 +718,7 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": "$loki_datasource", "datasource": "$logs",
"fieldConfig": {
"defaults": {
"unit": "ops"
}
},
"fill": 1, "fill": 1,
"fillGradient": 0, "fillGradient": 0,
"gridPos": { "gridPos": {
@ -767,7 +729,6 @@
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 31, "id": 31,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -810,7 +771,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__interval])) by (level)", "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)",
"intervalFactor": 3, "intervalFactor": 3,
"legendFormat": "{{level}}", "legendFormat": "{{level}}",
"refId": "A" "refId": "A"
@ -826,7 +787,7 @@
"sort": 2, "sort": 2,
"value_type": "individual" "value_type": "individual"
}, },
"type": "timeseries", "type": "graph",
"xaxis": { "xaxis": {
"buckets": null, "buckets": null,
"mode": "time", "mode": "time",
@ -858,7 +819,7 @@
} }
}, },
{ {
"datasource": "$loki_datasource", "datasource": "$logs",
"gridPos": { "gridPos": {
"h": 19, "h": 19,
"w": 24, "w": 24,
@ -866,7 +827,6 @@
"y": 6 "y": 6
}, },
"id": 29, "id": 29,
"interval": "1m",
"maxDataPoints": "", "maxDataPoints": "",
"options": { "options": {
"showLabels": false, "showLabels": false,
@ -901,7 +861,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": "Data source", "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -958,7 +918,7 @@
{ {
"hide": 0, "hide": 0,
"label": null, "label": null,
"name": "loki_datasource", "name": "logs",
"options": [ ], "options": [ ],
"query": "loki", "query": "loki",
"refresh": 1, "refresh": 1,

View File

@ -0,0 +1,723 @@
{
"annotations": {
"list": [ ]
},
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"iteration": 1635347545534,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"loki"
],
"targetBlank": false,
"title": "Loki Dashboards",
"type": "dashboards"
}
],
"liveNow": false,
"panels": [
{
"datasource": "${datasource}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 2,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.0-38205pre",
"targets": [
{
"datasource": "${datasource}",
"exemplar": false,
"expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Appenders Not Ready",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 11,
"x": 2,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Samples Appended to WAL per Second",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Series are unique combinations of labels",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 11,
"x": 13,
"y": 0
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Series Created per Second",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 10
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Write Behind",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 10
},
"id": 7,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Samples Sent per Second",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 20
},
"id": 8,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "WAL Disk Size",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 20
},
"id": 9,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Pending Samples",
"type": "timeseries"
}
],
"refresh": "10s",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"loki"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(loki_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"hide": 0,
"label": null,
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "tenant",
"options": [ ],
"query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))",
"refresh": 0,
"regex": "/\"([^\"]+)\"/",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Loki / Recording Rules",
"uid": "recording-rules",
"version": 0,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,836 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "request",
"color": "#FFC000",
"dashLength": 5,
"dashes": true,
"fill": 0
},
{
"alias": "limit",
"color": "#E02F44",
"dashLength": 5,
"dashes": true,
"fill": 0
}
],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limit",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "request",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "CPU",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "request",
"color": "#FFC000",
"dashLength": 5,
"dashes": true,
"fill": 0
},
{
"alias": "limit",
"color": "#E02F44",
"dashLength": 5,
"dashes": true,
"fill": 0
}
],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limit",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "request",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Memory (workingset)",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Memory (go heap inuse)",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Alertmanager",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Receive bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Transmit bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Network",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}} - {{device}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Disk writes",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}} - {{device}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Disk reads",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Disk",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{persistentvolumeclaim}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Disk space utilization",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Alertmanager resources",
"uid": "a6883fb22799ac74479c7db872451092",
"version": 0
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,940 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "request",
"color": "#FFC000",
"dashLength": 5,
"dashes": true,
"fill": 0
},
{
"alias": "limit",
"color": "#E02F44",
"dashLength": 5,
"dashes": true,
"fill": 0
}
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limit",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "request",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "CPU",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Memory (go heap inuse)",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU and memory",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "request",
"color": "#FFC000",
"dashLength": 5,
"dashes": true,
"fill": 0
},
{
"alias": "limit",
"color": "#E02F44",
"dashLength": 5,
"dashes": true,
"fill": 0
}
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limit",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "request",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Memory (RSS)",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "request",
"color": "#FFC000",
"dashLength": 5,
"dashes": true,
"fill": 0
},
{
"alias": "limit",
"color": "#E02F44",
"dashLength": 5,
"dashes": true,
"fill": 0
}
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limit",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "request",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Memory (workingset)",
"tooltip": {
"sort": 2
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Receive bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Transmit bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Network",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}} - {{device}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Disk writes",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}} - {{device}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Disk reads",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{persistentvolumeclaim}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Disk space utilization",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Disk",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": true,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Compactor resources",
"uid": "09a5c49e9cdb2f2b24c6d184574a07fd",
"version": 0
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,312 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "sha256:{{sha256}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Startup config file hashes",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "instances",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Startup config file",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "sha256:{{sha256}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Runtime config file hashes",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "instances",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Runtime config file",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "namespace",
"multi": true,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Config",
"uid": "5d9d0b4724c0f80d68467088ec61e003",
"version": 0
}

View File

@ -0,0 +1,938 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{component}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "RPS / component",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"max": 1,
"min": 0,
"noValue": "0",
"unit": "percentunit"
}
},
"id": 2,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"targets": [
{
"expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{component}}",
"legendLink": null
}
],
"title": "Error rate / component",
"type": "timeseries"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Components",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{operation}}",
"legendLink": null
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "RPS / operation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"max": 1,
"min": 0,
"noValue": "0",
"unit": "percentunit"
}
},
"id": 4,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"targets": [
{
"expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{operation}}",
"legendLink": null
}
],
"title": "Error rate / operation",
"type": "timeseries"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Operations",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Op: Get",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Op: GetRange",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Op: Exists",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Op: Attributes",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Op: Upload",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Op: Delete",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "namespace",
"multi": true,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Object Store",
"uid": "e1324ee2a434f4158c00a9ee279d3292",
"version": 0
}

View File

@ -0,0 +1,266 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"datasource": "${datasource}",
"id": 1,
"span": 12,
"targets": [
{
"expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "Defaults",
"transformations": [
{
"id": "labelsToFields",
"options": { }
},
{
"id": "merge",
"options": { }
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {
"Value": 1,
"limit_name": 0
}
}
},
{
"id": "sortBy",
"options": {
"fields": { },
"sort": [
{
"field": "limit_name"
}
]
}
}
],
"type": "table"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"datasource": "${datasource}",
"id": 2,
"span": 12,
"targets": [
{
"expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "Per-tenant overrides",
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns",
"valueLabel": "limit_name"
}
},
{
"id": "merge",
"options": { }
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {
"user": 0
}
}
}
],
"type": "table"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": true,
"text": ".*",
"value": ".*"
},
"hide": 0,
"label": "Tenant ID",
"name": "tenant_id",
"options": [
{
"selected": true,
"text": ".*",
"value": ".*"
}
],
"query": ".*",
"type": "textbox"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Overrides",
"uid": "1e2c358600ac53f09faea133f811b5bb",
"version": 0
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,362 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": "200px",
"panels": [
{
"id": 1,
"options": {
"content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n",
"mode": "markdown"
},
"span": 12,
"title": "",
"type": "text"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Service scaling",
"titleSize": "h6"
},
{
"collapse": false,
"height": "400px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"sort": {
"col": 0,
"desc": false
},
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "Required Replicas",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value",
"thresholds": [ ],
"type": "number",
"unit": "short"
},
{
"alias": "Cluster",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "__name__",
"thresholds": [ ],
"type": "hidden",
"unit": "short"
},
{
"alias": "Cluster",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "cluster",
"thresholds": [ ],
"type": "number",
"unit": "short"
},
{
"alias": "Service",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "deployment",
"thresholds": [ ],
"type": "number",
"unit": "short"
},
{
"alias": "Namespace",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "namespace",
"thresholds": [ ],
"type": "number",
"unit": "short"
},
{
"alias": "Reason",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "reason",
"thresholds": [ ],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [ ],
"type": "string",
"unit": "short"
}
],
"targets": [
{
"expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Workload-based scaling",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Scaling",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "namespace",
"multi": true,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Scaling",
"uid": "64bbad83507b7289b514725658e10352",
"version": 0
}

View File

@ -0,0 +1,323 @@
{
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "8.0.0"
}
],
"annotations": {
"list": [ ]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mimir"
],
"targetBlank": false,
"title": "Mimir dashboards",
"type": "dashboards"
}
],
"refresh": "",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"datasource": "${lokidatasource}",
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time range"
},
"properties": [
{
"id": "mappings",
"value": [
{
"from": "",
"id": 1,
"text": "Instant query",
"to": "",
"type": 1,
"value": "0"
}
]
},
{
"id": "unit",
"value": "s"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Step"
},
"properties": [
{
"id": "unit",
"value": "s"
}
]
}
]
},
"id": 1,
"span": 12,
"targets": [
{
"expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | response_time > ${min_duration}",
"instant": false,
"legendFormat": "",
"range": true,
"refId": "A"
}
],
"title": "Slow queries",
"transformations": [
{
"id": "extractFields",
"options": {
"source": "labels"
}
},
{
"id": "calculateField",
"options": {
"alias": "Time range",
"binary": {
"left": "param_end",
"operator": "-",
"reducer": "sum",
"right": "param_start"
},
"mode": "binary",
"reduce": {
"reducer": "sum"
},
"replaceFields": false
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Line": true,
"Time": true,
"caller": true,
"cluster": true,
"container": true,
"host": true,
"id": true,
"job": true,
"labels": true,
"level": true,
"line": true,
"method": true,
"msg": true,
"name": true,
"namespace": true,
"param_end": true,
"param_start": true,
"param_time": true,
"path": true,
"pod": true,
"pod_template_hash": true,
"query_wall_time_seconds": true,
"stream": true,
"traceID": true,
"tsNs": true
},
"indexByName": {
"Time range": 3,
"param_query": 2,
"param_step": 4,
"response_time": 5,
"ts": 0,
"user": 1
},
"renameByName": {
"org_id": "Tenant ID",
"param_query": "Query",
"param_step": "Step",
"response_time": "Duration"
}
}
}
],
"type": "table"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mimir"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(cortex_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"hide": 0,
"includeAll": false,
"label": "Logs datasource",
"multi": false,
"name": "lokidatasource",
"query": "loki",
"type": "datasource"
},
{
"current": {
"selected": true,
"text": "5s",
"value": "5s"
},
"hide": 0,
"label": "Min duration",
"name": "min_duration",
"options": [
{
"selected": true,
"text": "5s",
"value": "5s"
}
],
"query": "5s",
"type": "textbox"
},
{
"current": {
"selected": true,
"text": ".*",
"value": ".*"
},
"hide": 0,
"label": "Tenant ID",
"name": "tenant_id",
"options": [
{
"selected": true,
"text": ".*",
"value": ".*"
}
],
"query": ".*",
"type": "textbox"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Mimir / Slow queries",
"uid": "6089e1ce1e678788f46312a0a1e647e6",
"version": 0
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,53 +0,0 @@
groups:
- name: "loki_rules"
rules:
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job))"
record: "cluster_job:loki_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job))"
record: "cluster_job:loki_request_duration_seconds:50quantile"
- expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[5m]))
by (cluster, job)"
record: "cluster_job:loki_request_duration_seconds:avg"
- expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job)"
record: "cluster_job:loki_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job)"
record: "cluster_job:loki_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job)"
record: "cluster_job:loki_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))"
record: "cluster_job_route:loki_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, job, route))"
record: "cluster_job_route:loki_request_duration_seconds:50quantile"
- expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)
/ sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
record: "cluster_job_route:loki_request_duration_seconds:avg"
- expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job,
route)"
record: "cluster_job_route:loki_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)"
record: "cluster_job_route:loki_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)"
record: "cluster_job_route:loki_request_duration_seconds_count:sum_rate"
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:loki_request_duration_seconds:99quantile"
- expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m]))
by (le, cluster, namespace, job, route))"
record: "cluster_namespace_job_route:loki_request_duration_seconds:50quantile"
- expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
job, route) / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster,
namespace, job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds:avg"
- expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, namespace,
job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace,
job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
- expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, namespace,
job, route)"
record: "cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate"

View File

@ -1,33 +0,0 @@
{{/*
Return the appropriate apiVersion for ingress.
*/}}
{{- define "ingress.apiVersion" -}}
{{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}}
{{- print "networking.k8s.io/v1" -}}
{{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}}
{{- print "networking.k8s.io/v1beta1" -}}
{{- else -}}
{{- print "extensions/v1beta1" -}}
{{- end -}}
{{- end -}}
{{/*
Return if ingress is stable.
*/}}
{{- define "ingress.isStable" -}}
{{- eq (include "ingress.apiVersion" .) "networking.k8s.io/v1" -}}
{{- end -}}
{{/*
Return if ingress supports ingressClassName.
*/}}
{{- define "ingress.supportsIngressClassName" -}}
{{- or (eq (include "ingress.isStable" .) "true") (and (eq (include "ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) -}}
{{- end -}}
{{/*
Return if ingress supports pathType.
*/}}
{{- define "ingress.supportsPathType" -}}
{{- or (eq (include "ingress.isStable" .) "true") (and (eq (include "ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) -}}
{{- end -}}

View File

@ -6,24 +6,6 @@
{{- join ", " $list }} {{- join ", " $list }}
{{- end }} {{- end }}
{{- define "agent.all_namespaces" -}}
{{- $list := list }}
{{- range .Values.namespacesToMonitor }}
{{- $list = append $list (printf "\"%s\"" .) }}
{{- end }}
{{- $list = append $list (printf "\"%s\"" .Release.Namespace) }}
{{- join ", " $list }}
{{- end }}
{{- define "agent.all_namespaces_bar" -}}
{{- $list := list }}
{{- range .Values.namespacesToMonitor }}
{{- $list = append $list (printf "%s" .) }}
{{- end }}
{{- $list = append $list .Release.Namespace }}
{{- join "|" $list }}
{{- end }}
{{- define "agent.loki_write_targets" -}} {{- define "agent.loki_write_targets" -}}
{{- $list := list }} {{- $list := list }}
{{- if .Values.local.logs.enabled }} {{- if .Values.local.logs.enabled }}
@ -36,10 +18,10 @@
{{- end }} {{- end }}
{{- define "agent.loki_process_targets" -}} {{- define "agent.loki_process_targets" -}}
{{- if and (empty .Values.logs.piiRegexes) (empty .Values.logs.retain) }} {{- if empty .Values.logs.piiRegexes }}
{{- include "agent.loki_write_targets" . }} {{- include "agent.loki_write_targets" . }}
{{- else }} {{- else }}
{{- printf "loki.process.filter.receiver" }} {{- printf "loki.process.PII.receiver" }}
{{- end }} {{- end }}
{{- end }} {{- end }}
@ -57,32 +39,10 @@
{{- define "agent.tempo_write_targets" -}} {{- define "agent.tempo_write_targets" -}}
{{- $list := list }} {{- $list := list }}
{{- if .Values.local.traces.enabled }} {{- if .Values.local.traces.enabled }}
{{- $list = append $list ("otelcol.exporter.otlphttp.local.input") }} {{- $list = append $list ("otelcol.exporter.otlp.local.input") }}
{{- end }} {{- end }}
{{- if .Values.cloud.traces.enabled }} {{- if .Values.cloud.traces.enabled }}
{{- $list = append $list ("otelcol.exporter.otlphttp.cloud.input") }} {{- $list = append $list ("otelcol.exporter.otlp.cloud.input") }}
{{- end }} {{- end }}
{{- join ", " $list }} {{- join ", " $list }}
{{- end }} {{- end }}
{{- define "agent.all_logs" -}}
{{- $list := list }}
{{- range .Values.logs.retain }}
{{- $list = append $list . }}
{{- end }}
{{- range .Values.logs.extraLogs }}
{{- $list = append $list . }}
{{- end }}
{{- join "|" $list }}
{{- end }}
{{- define "agent.all_metrics" -}}
{{- $list := list }}
{{- range .Values.metrics.retain }}
{{- $list = append $list . }}
{{- end }}
{{- range .Values.metrics.extraMetrics }}
{{- $list = append $list . }}
{{- end }}
{{- join "|" $list }}
{{- end }}

View File

@ -8,7 +8,7 @@ data:
discovery.kubernetes "pods" { discovery.kubernetes "pods" {
role = "pod" role = "pod"
namespaces { namespaces {
own_namespace = true own_namespace = false
names = [ {{ include "agent.namespaces" . }} ] names = [ {{ include "agent.namespaces" . }} ]
} }
} }
@ -33,40 +33,20 @@ data:
} }
rule { rule {
target_label = "cluster" target_label = "cluster"
replacement = "{{- .Values.clusterLabelValue -}}" replacement = "{{- .Values.clusterName -}}"
} }
} }
{{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }} {{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }}
// Logs
{{- if .Values.cloud.logs.enabled }}
remote.kubernetes.secret "logs_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.logs.secret -}}"
}
{{- end }}
loki.source.kubernetes "pods" { loki.source.kubernetes "pods" {
clustering {
enabled = true
}
targets = discovery.relabel.rename_meta_labels.output targets = discovery.relabel.rename_meta_labels.output
forward_to = [ {{ include "agent.loki_process_targets" . }} ] forward_to = [ {{ include "agent.loki_process_targets" . }} ]
} }
{{- if or (not (empty .Values.logs.retain)) (not (empty .Values.logs.piiRegexes)) }} {{- if not (empty .Values.logs.piiRegexes) }}
loki.process "filter" { loki.process "PII" {
forward_to = [ {{ include "agent.loki_write_targets" . }} ] forward_to = [ {{ include "agent.loki_write_targets" . }} ]
{{- if or (not (empty .Values.logs.retain)) (not (empty .Values.logs.extraLogs)) }}
stage.match {
selector = "{cluster=\"{{- .Values.clusterLabelValue -}}\", namespace=~\"{{- join "|" .Values.namespacesToMonitor -}}|{{- $.Release.Namespace -}}\", pod=~\"loki.*\"} !~ \"{{ include "agent.all_logs" . }}\""
action = "drop"
}
{{- end }}
{{- if not (empty .Values.logs.piiRegexes) }}
{{- range .Values.logs.piiRegexes }} {{- range .Values.logs.piiRegexes }}
stage.replace { stage.replace {
expression = "{{ .expression }}" expression = "{{ .expression }}"
@ -74,106 +54,33 @@ data:
replace = "{{ .replace }}" replace = "{{ .replace }}"
} }
{{- end }} {{- end }}
{{- end }}
} }
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }} {{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }}
// Metrics
{{- if .Values.cloud.metrics.enabled }}
remote.kubernetes.secret "metrics_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.metrics.secret -}}"
}
{{- end }}
discovery.kubernetes "metric_pods" {
role = "pod"
namespaces {
own_namespace = true
names = [ {{ include "agent.all_namespaces" . }} ]
}
}
discovery.relabel "only_http_metrics" {
targets = discovery.kubernetes.metric_pods.targets
rule {
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}
rule {
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
}
rule {
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
separator = "/"
regex = "(.*)/(.*)/(.*)"
replacement = "${1}/${2}-${3}"
target_label = "job"
}
rule {
target_label = "cluster"
replacement = "{{- .Values.clusterLabelValue -}}"
}
rule {
source_labels = ["__meta_kubernetes_pod_container_port_name"]
action = "keep"
regex = ".*metrics.*"
}
}
prometheus.scrape "pods" { prometheus.scrape "pods" {
clustering { targets = discovery.relabel.rename_meta_labels.output
enabled = true
}
targets = discovery.relabel.only_http_metrics.output
forward_to = [ prometheus.relabel.filter.receiver ]
}
prometheus.relabel "filter" {
rule {
target_label = "cluster"
replacement = "{{- .Values.clusterLabelValue -}}"
}
rule {
source_labels = ["__name__"]
regex = "({{ include "agent.all_metrics" . }})"
action = "keep"
}
rule {
source_labels = ["namespace"]
regex = "{{ include "agent.all_namespaces_bar" . }}"
action = "keep"
}
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ] forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
} }
{{- if .Values.kubeStateMetrics.enabled }}
prometheus.scrape "kubeStateMetrics" {
targets = [ { "__address__" = "{{ .Values.kubeStateMetrics.endpoint }}" } ]
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
}
{{- end }}
{{- end }} {{- end }}
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }} {{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
// Traces
{{- if .Values.cloud.traces.enabled }}
remote.kubernetes.secret "traces_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.traces.secret -}}"
}
{{- end }}
// Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river // Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river
otelcol.receiver.otlp "otlp_receiver" { otelcol.receiver.otlp "otlp_receiver" {
// We don't technically need this, but it shows how to change listen address and incoming port. // We don't technically need this, but it shows how to change listen address and incoming port.
// In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the // In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the
// default OTLP gRPC port) for the OTLP protocol. // default OTLP gRPC port) for the OTLP protocol.
grpc {} grpc {
endpoint = "0.0.0.0:4317"
}
// We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor // We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor
// named 'default'. // named 'default'.
@ -182,16 +89,6 @@ data:
} }
} }
otelcol.receiver.jaeger "jaeger" {
protocols {
thrift_http {}
}
output {
traces = [otelcol.processor.batch.default.input]
}
}
// The OpenTelemetry batch processor collects trace spans until a batch size or timeout is met, before sending those // The OpenTelemetry batch processor collects trace spans until a batch size or timeout is met, before sending those
// spans onto another target. This processor is labeled 'default'. // spans onto another target. This processor is labeled 'default'.
otelcol.processor.batch "default" { otelcol.processor.batch "default" {
@ -209,7 +106,7 @@ data:
{{- if .Values.local.logs.enabled }} {{- if .Values.local.logs.enabled }}
loki.write "local" { loki.write "local" {
endpoint { endpoint {
url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push" url = "http://loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
} }
} }
{{- end }} {{- end }}
@ -222,10 +119,21 @@ data:
} }
{{- end }} {{- end }}
{{- if .Values.local.traces.enabled }} {{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
otelcol.exporter.otlphttp "local" { // The OpenTelemetry exporter exports processed trace spans to another target that is listening for OTLP format traces.
// A unique label, 'local', is added to uniquely identify this exporter.
otelcol.exporter.otlp "local" {
// Define the client for exporting.
client { client {
endpoint = "http://{{- .Release.Name -}}-tempo-distributor.{{- .Release.Namespace -}}.svc:4318" // Send to the locally running Tempo instance, on port 4317 (OTLP gRPC).
endpoint = "meta-tempo-distributor:4317"
// Configure TLS settings for communicating with the endpoint.
tls {
// The connection is insecure.
insecure = true
// Do not verify TLS certificates when connecting.
insecure_skip_verify = true
}
} }
} }
{{- end }} {{- end }}
@ -233,10 +141,11 @@ data:
{{- if .Values.cloud.logs.enabled }} {{- if .Values.cloud.logs.enabled }}
loki.write "cloud" { loki.write "cloud" {
endpoint { endpoint {
url = nonsensitive(remote.kubernetes.secret.logs_credentials.data["endpoint"]) url = "{{- .Values.cloud.logs.endpoint -}}/loki/api/v1/push"
basic_auth { basic_auth {
username = nonsensitive(remote.kubernetes.secret.logs_credentials.data["username"]) username = "{{- .Values.cloud.logs.username -}}"
password = remote.kubernetes.secret.logs_credentials.data["password"] password = "{{- .Values.cloud.logs.password -}}"
} }
} }
} }
@ -245,25 +154,26 @@ data:
{{- if .Values.cloud.metrics.enabled }} {{- if .Values.cloud.metrics.enabled }}
prometheus.remote_write "cloud" { prometheus.remote_write "cloud" {
endpoint { endpoint {
url = nonsensitive(remote.kubernetes.secret.metrics_credentials.data["endpoint"]) url = "{{- .Values.cloud.metrics.endpoint -}}/api/prom/push"
basic_auth { basic_auth {
username = nonsensitive(remote.kubernetes.secret.metrics_credentials.data["username"]) username = "{{- .Values.cloud.metrics.username -}}"
password = remote.kubernetes.secret.metrics_credentials.data["password"] password = "{{- .Values.cloud.metrics.password -}}"
} }
} }
} }
{{- end }} {{- end }}
{{- if .Values.cloud.traces.enabled }} {{- if .Values.cloud.traces.enabled }}
otelcol.exporter.otlphttp "cloud" { otelcol.exporter.otlp "cloud" {
client { client {
endpoint = nonsensitive(remote.kubernetes.secret.traces_credentials.data["endpoint"]) endpoint = "{{- .Values.cloud.traces.endpoint -}}"
auth = otelcol.auth.basic.creds.handler auth = otelcol.auth.basic.creds.handler
} }
} }
otelcol.auth.basic "creds" { otelcol.auth.basic "creds" {
username = nonsensitive(remote.kubernetes.secret.traces_credentials.data["username"]) username = "{{- .Values.cloud.traces.username -}}"
password = remote.kubernetes.secret.traces_credentials.data["password"] password = "{{- .Values.cloud.traces.password -}}"
} }
{{- end }} {{- end }}

View File

@ -1,21 +0,0 @@
{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alloy-dashboards-1
namespace: {{ $.Release.Namespace }}
data:
"alloy-cluster-node.json": |
{{ $.Files.Get "src/dashboards/alloy-cluster-node.json" | fromJson | toJson }}
"alloy-cluster-overview.json": |
{{ $.Files.Get "src/dashboards/alloy-cluster-overview.json" | fromJson | toJson }}
"alloy-controller.json": |
{{ $.Files.Get "src/dashboards/alloy-controller.json" | fromJson | toJson }}
"alloy-opentelemetry.json": |
{{ $.Files.Get "src/dashboards/alloy-opentelemetry.json" | fromJson | toJson }}
"alloy-prometheus.json": |
{{ $.Files.Get "src/dashboards/alloy-prometheus.json" | fromJson | toJson }}
"alloy-resources.json": |
{{ $.Files.Get "src/dashboards/alloy-resources.json" | fromJson | toJson }}
{{- end }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }} {{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
@ -28,12 +28,56 @@ data:
orgId: 1 orgId: 1
type: file type: file
{{- end }} {{- end }}
{{- if .Values.dashboards.metrics.enabled }}
- disableDeletion: true - disableDeletion: true
editable: false editable: false
folder: Alloy folder: Mimir
name: alloy-1 name: mimir-1
options: options:
path: /var/lib/grafana/dashboards/alloy-1 path: /var/lib/grafana/dashboards/mimir-1
orgId: 1
type: file
- disableDeletion: true
editable: false
folder: Mimir
name: mimir-2
options:
path: /var/lib/grafana/dashboards/mimir-2
orgId: 1
type: file
- disableDeletion: true
editable: false
folder: Mimir
name: mimir-3
options:
path: /var/lib/grafana/dashboards/mimir-3
orgId: 1
type: file
- disableDeletion: true
editable: false
folder: Mimir
name: mimir-4
options:
path: /var/lib/grafana/dashboards/mimir-4
orgId: 1
type: file
- disableDeletion: true
editable: false
folder: Mimir
name: mimir-5
options:
path: /var/lib/grafana/dashboards/mimir-5
orgId: 1 orgId: 1
type: file type: file
{{- end }} {{- end }}
{{- if .Values.dashboards.traces.enabled }}
- disableDeletion: true
editable: false
folder: Tempo
name: tempo-1
options:
path: /var/lib/grafana/dashboards/tempo-1
orgId: 1
type: file
{{- end }}
{{- end }}

View File

@ -1,4 +1,4 @@
{{- if .Values.local.grafana.enabled }} {{- if or (or .Values.local.logs.enabled .Values.local.metrics.enabled) .Values.local.traces.enabled }}
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
@ -32,7 +32,7 @@ data:
uid: loki_ds uid: loki_ds
# <string> Sets the data source's URL, including the # <string> Sets the data source's URL, including the
# port. # port.
url: http://{{- $.Release.Namespace -}}-loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local url: http://loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
# <bool> Toggles whether the data source is pre-selected # <bool> Toggles whether the data source is pre-selected
# for new panels. You can set only one default # for new panels. You can set only one default
# data source per organization. # data source per organization.
@ -61,10 +61,6 @@ data:
# <bool> Allows users to edit data sources from the # <bool> Allows users to edit data sources from the
# Grafana UI. # Grafana UI.
editable: true editable: true
# Extra config.
jsonData:
# Scrape interval
timeInterval: 1m
{{- end }} {{- end }}
{{- if .Values.local.traces.enabled }} {{- if .Values.local.traces.enabled }}
- name: Tempo - name: Tempo

View File

@ -1,57 +0,0 @@
{{- if and .Values.local.grafana.enabled .Values.grafana.ingress.enabled -}}
{{- $ingressApiIsStable := eq (include "ingress.isStable" .) "true" -}}
{{- $ingressSupportsIngressClassName := eq (include "ingress.supportsIngressClassName" .) "true" -}}
{{- $ingressSupportsPathType := eq (include "ingress.supportsPathType" .) "true" -}}
apiVersion: {{ include "ingress.apiVersion" . }}
kind: Ingress
metadata:
name: grafana
namespace: {{ $.Release.Namespace }}
labels:
app: grafana
{{- range $labelKey, $labelValue := .Values.grafana.ingress.labels }}
{{ $labelKey }}: {{ $labelValue | toYaml }}
{{- end }}
{{- with .Values.grafana.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and $ingressSupportsIngressClassName .Values.grafana.ingress.ingressClassName }}
ingressClassName: {{ .Values.grafana.ingress.ingressClassName }}
{{- end -}}
{{- if .Values.grafana.ingress.tls }}
tls:
{{- range .Values.grafana.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ tpl . $ | quote }}
{{- end }}
{{- with .secretName }}
secretName: {{ . }}
{{- end }}
{{- end }}
{{- end }}
rules:
{{- range .Values.grafana.ingress.hosts }}
- host: {{ tpl .host $ | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if $ingressSupportsPathType }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if $ingressApiIsStable }}
service:
name: grafana
port:
number: 3000
{{- else }}
serviceName: grafana
servicePort: 3000
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -1,12 +0,0 @@
{{- if .Values.local.grafana.enabled }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
{{- end }}

View File

@ -1,15 +0,0 @@
{{- if .Values.local.grafana.enabled }}
apiVersion: v1
kind: Service
metadata:
name: grafana
spec:
ports:
- port: 3000
protocol: TCP
targetPort: http-grafana
selector:
app: grafana
sessionAffinity: None
type: ClusterIP # Make this configurable
{{- end }}

View File

@ -1,4 +1,16 @@
{{- if .Values.local.grafana.enabled }} {{- if or (or .Values.local.logs.enabled .Values.local.metrics.enabled) .Values.local.traces.enabled }}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
@ -20,7 +32,7 @@ spec:
- 0 - 0
containers: containers:
- name: grafana - name: grafana
image: grafana/grafana:{{- .Values.grafana.version }} image: grafana/grafana:10.0.0
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
ports: ports:
- containerPort: 3000 - containerPort: 3000
@ -53,7 +65,7 @@ spec:
name: grafana-pv name: grafana-pv
- mountPath: /etc/grafana/provisioning/datasources - mountPath: /etc/grafana/provisioning/datasources
name: datasources-provisioning name: datasources-provisioning
{{- if .Values.dashboards.logs.enabled }} {{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
- mountPath: /etc/grafana/provisioning/dashboards - mountPath: /etc/grafana/provisioning/dashboards
name: dashboards-provisioning name: dashboards-provisioning
{{- end }} {{- end }}
@ -63,8 +75,22 @@ spec:
- mountPath: /var/lib/grafana/dashboards/loki-2 - mountPath: /var/lib/grafana/dashboards/loki-2
name: loki-dashboards-2 name: loki-dashboards-2
{{- end }} {{- end }}
- mountPath: /var/lib/grafana/dashboards/alloy-1 {{- if .Values.dashboards.metrics.enabled }}
name: alloy-dashboards-1 - mountPath: /var/lib/grafana/dashboards/mimir-1
name: mimir-dashboards-1
- mountPath: /var/lib/grafana/dashboards/mimir-2
name: mimir-dashboards-2
- mountPath: /var/lib/grafana/dashboards/mimir-3
name: mimir-dashboards-3
- mountPath: /var/lib/grafana/dashboards/mimir-4
name: mimir-dashboards-4
- mountPath: /var/lib/grafana/dashboards/mimir-5
name: mimir-dashboards-5
{{- end }}
{{- if .Values.dashboards.traces.enabled }}
- mountPath: /var/lib/grafana/dashboards/tempo-1
name: tempo-dashboards-1
{{- end }}
volumes: volumes:
- name: grafana-pv - name: grafana-pv
persistentVolumeClaim: persistentVolumeClaim:
@ -83,7 +109,41 @@ spec:
configMap: configMap:
name: loki-dashboards-2 name: loki-dashboards-2
{{- end }} {{- end }}
- name: alloy-dashboards-1 {{- if .Values.dashboards.metrics.enabled }}
- name: mimir-dashboards-1
configMap: configMap:
name: alloy-dashboards-1 name: mimir-dashboards-1
- name: mimir-dashboards-2
configMap:
name: mimir-dashboards-2
- name: mimir-dashboards-3
configMap:
name: mimir-dashboards-3
- name: mimir-dashboards-4
configMap:
name: mimir-dashboards-4
- name: mimir-dashboards-5
configMap:
name: mimir-dashboards-5
{{- end }}
{{- if .Values.dashboards.traces.enabled }}
- name: tempo-dashboards-1
configMap:
name: tempo-dashboards-1
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
name: grafana
spec:
ports:
- port: 3000
protocol: TCP
targetPort: http-grafana
selector:
app: grafana
sessionAffinity: None
type: ClusterIP # Make this configurable
{{- end }} {{- end }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }} {{- if .Values.dashboards.logs.enabled }}
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
@ -12,6 +12,8 @@ data:
{{ $.Files.Get "src/dashboards/loki-deletion.json" | fromJson | toJson }} {{ $.Files.Get "src/dashboards/loki-deletion.json" | fromJson | toJson }}
"loki-logs.json": | "loki-logs.json": |
{{ $.Files.Get "src/dashboards/loki-logs.json" | fromJson | toJson }} {{ $.Files.Get "src/dashboards/loki-logs.json" | fromJson | toJson }}
"loki-mixin-recording-rules.json": |
{{ $.Files.Get "src/dashboards/loki-mixin-recording-rules.json" | fromJson | toJson }}
"loki-operational.json": | "loki-operational.json": |
{{ $.Files.Get "src/dashboards/loki-operational.json" | fromJson | toJson }} {{ $.Files.Get "src/dashboards/loki-operational.json" | fromJson | toJson }}
{{- end }} {{- end }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }} {{- if .Values.dashboards.logs.enabled }}
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap

View File

@ -0,0 +1,19 @@
{{- if .Values.dashboards.metrics.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mimir-dashboards-5
namespace: {{ $.Release.Namespace }}
data:
"mimir-tenants.json": |
{{ $.Files.Get "src/dashboards/mimir-tenants.json" | fromJson | toJson }}
"mimir-top-tenants.json": |
{{ $.Files.Get "src/dashboards/mimir-top-tenants.json" | fromJson | toJson }}
"mimir-writes-networking.json": |
{{ $.Files.Get "src/dashboards/mimir-writes-networking.json" | fromJson | toJson }}
"mimir-writes-resources.json": |
{{ $.Files.Get "src/dashboards/mimir-writes-resources.json" | fromJson | toJson }}
"mimir-writes.json": |
{{ $.Files.Get "src/dashboards/mimir-writes.json" | fromJson | toJson }}
{{- end }}

View File

@ -0,0 +1,19 @@
{{- if .Values.dashboards.metrics.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mimir-dashboards-1
namespace: {{ $.Release.Namespace }}
data:
"mimir-alertmanager-resources.json": |
{{ $.Files.Get "src/dashboards/mimir-alertmanager-resources.json" | fromJson | toJson }}
"mimir-alertmanager.json": |
{{ $.Files.Get "src/dashboards/mimir-alertmanager.json" | fromJson | toJson }}
"mimir-compactor-resources.json": |
{{ $.Files.Get "src/dashboards/mimir-compactor-resources.json" | fromJson | toJson }}
"mimir-compactor.json": |
{{ $.Files.Get "src/dashboards/mimir-compactor.json" | fromJson | toJson }}
"mimir-config.json": |
{{ $.Files.Get "src/dashboards/mimir-config.json" | fromJson | toJson }}
{{- end }}

View File

@ -0,0 +1,19 @@
{{- if .Values.dashboards.metrics.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mimir-dashboards-2
namespace: {{ $.Release.Namespace }}
data:
"mimir-object-store.json": |
{{ $.Files.Get "src/dashboards/mimir-object-store.json" | fromJson | toJson }}
"mimir-overrides.json": |
{{ $.Files.Get "src/dashboards/mimir-overrides.json" | fromJson | toJson }}
"mimir-overview-networking.json": |
{{ $.Files.Get "src/dashboards/mimir-overview-networking.json" | fromJson | toJson }}
"mimir-overview-resources.json": |
{{ $.Files.Get "src/dashboards/mimir-overview-resources.json" | fromJson | toJson }}
"mimir-overview.json": |
{{ $.Files.Get "src/dashboards/mimir-overview.json" | fromJson | toJson }}
{{- end }}

View File

@ -0,0 +1,19 @@
{{- if .Values.dashboards.metrics.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mimir-dashboards-3
namespace: {{ $.Release.Namespace }}
data:
"mimir-queries.json": |
{{ $.Files.Get "src/dashboards/mimir-queries.json" | fromJson | toJson }}
"mimir-reads-networking.json": |
{{ $.Files.Get "src/dashboards/mimir-reads-networking.json" | fromJson | toJson }}
"mimir-reads-resources.json": |
{{ $.Files.Get "src/dashboards/mimir-reads-resources.json" | fromJson | toJson }}
"mimir-reads.json": |
{{ $.Files.Get "src/dashboards/mimir-reads.json" | fromJson | toJson }}
"mimir-remote-ruler-reads-resources.json": |
{{ $.Files.Get "src/dashboards/mimir-remote-ruler-reads-resources.json" | fromJson | toJson }}
{{- end }}

View File

@ -0,0 +1,19 @@
{{- if .Values.dashboards.metrics.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mimir-dashboards-4
namespace: {{ $.Release.Namespace }}
data:
"mimir-remote-ruler-reads.json": |
{{ $.Files.Get "src/dashboards/mimir-remote-ruler-reads.json" | fromJson | toJson }}
"mimir-rollout-progress.json": |
{{ $.Files.Get "src/dashboards/mimir-rollout-progress.json" | fromJson | toJson }}
"mimir-ruler.json": |
{{ $.Files.Get "src/dashboards/mimir-ruler.json" | fromJson | toJson }}
"mimir-scaling.json": |
{{ $.Files.Get "src/dashboards/mimir-scaling.json" | fromJson | toJson }}
"mimir-slow-queries.json": |
{{ $.Files.Get "src/dashboards/mimir-slow-queries.json" | fromJson | toJson }}
{{- end }}

View File

@ -0,0 +1,21 @@
{{- if .Values.dashboards.traces.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: tempo-dashboards-1
namespace: {{ $.Release.Namespace }}
data:
"tempo-operational.json": |
{{ $.Files.Get "src/dashboards/tempo-operational.json" | fromJson | toJson }}
"tempo-reads.json": |
{{ $.Files.Get "src/dashboards/tempo-reads.json" | fromJson | toJson }}
"tempo-resources.json": |
{{ $.Files.Get "src/dashboards/tempo-resources.json" | fromJson | toJson }}
"tempo-rollout-progress.json": |
{{ $.Files.Get "src/dashboards/tempo-rollout-progress.json" | fromJson | toJson }}
"tempo-tenants.json": |
{{ $.Files.Get "src/dashboards/tempo-tenants.json" | fromJson | toJson }}
"tempo-writes.json": |
{{ $.Files.Get "src/dashboards/tempo-writes.json" | fromJson | toJson }}
{{- end }}

View File

@ -1,129 +0,0 @@
{{- if .Values.local.grafana.enabled }}
{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $.Release.Namespace }}-mimir-ruler-for-dashboards
namespace: {{ $.Release.Namespace }}
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app.kubernetes.io/component: ruler-for-dashboards
app.kubernetes.io/instance: meta
app.kubernetes.io/name: mimir
strategy:
rollingUpdate:
maxSurge: 50%
maxUnavailable: 0
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/component: ruler-for-dashboards
app.kubernetes.io/instance: meta
app.kubernetes.io/name: mimir
namespace: {{ $.Release.Namespace }}
spec:
containers:
- args:
- -target=ruler
- -log.level=debug
- -ruler-storage.backend=local
- -ruler-storage.local.directory=/etc/rules
- -ruler.ring.prefix=dashboards/
- -config.expand-env=true
- -config.file=/etc/mimir/mimir.yaml
image: grafana/mimir:2.8.0
imagePullPolicy: IfNotPresent
name: ruler
ports:
- containerPort: 8080
name: http-metrics
protocol: TCP
- containerPort: 9095
name: grpc
protocol: TCP
- containerPort: 7946
name: memberlist
protocol: TCP
envFrom:
- secretRef:
name: minio
readinessProbe:
failureThreshold: 3
httpGet:
path: /ready
port: http-metrics
scheme: HTTP
initialDelaySeconds: 45
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
requests:
cpu: 100m
memory: 128Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/mimir
name: config
- mountPath: /var/mimir
name: runtime-config
- mountPath: /data
name: storage
- mountPath: /active-query-tracker
name: active-queries
- mountPath: /etc/rules/anonymous
name: rules
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 10001
runAsGroup: 10001
runAsNonRoot: true
runAsUser: 10001
seccompProfile:
type: RuntimeDefault
terminationGracePeriodSeconds: 180
topologySpreadConstraints:
- labelSelector:
matchLabels:
app.kubernetes.io/component: ruler
app.kubernetes.io/instance: meta
app.kubernetes.io/name: mimir
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- configMap:
defaultMode: 420
items:
- key: mimir.yaml
path: mimir.yaml
name: {{ $.Release.Namespace }}-mimir-config
name: config
- configMap:
defaultMode: 420
name: {{ $.Release.Namespace }}-mimir-runtime
name: runtime-config
- emptyDir: {}
name: storage
- emptyDir: {}
name: active-queries
- configMap:
defaultMode: 420
name: rules
name: rules
{{- end }}
{{- end }}

View File

@ -1,14 +0,0 @@
{{- if .Values.local.metrics.enabled }}
{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: rules
namespace: {{ $.Release.Namespace }}
data:
{{- if .Values.dashboards.logs.enabled }}
{{ ($.Files.Glob "src/rules/loki-rules.yaml").AsConfig | indent 2 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -3,20 +3,20 @@
{{- end -}} {{- end -}}
{{- if eq .Values.cloud.logs.enabled true -}} {{- if eq .Values.cloud.logs.enabled true -}}
{{- if empty .Values.cloud.logs.secret -}} {{- if or (empty .Values.cloud.logs.endpoint) (or (empty .Values.cloud.logs.username) (empty .Values.cloud.logs.password)) -}}
{{- fail "if cloud.logs is enabled then the secret has to be filled in" -}} {{- fail "if cloud.logs is enabled then the endpoint, username and password have to be filled in" -}}
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{- if eq .Values.cloud.metrics.enabled true -}} {{- if eq .Values.cloud.metrics.enabled true -}}
{{- if empty .Values.cloud.metrics.secret -}} {{- if or (empty .Values.cloud.metrics.endpoint) (or (empty .Values.cloud.metrics.username) (empty .Values.cloud.metrics.password)) -}}
{{- fail "if cloud.metrics is enabled then the secret has to be filled in" -}} {{- fail "if cloud.metrics is enabled then the endpoint, username and password have to be filled in" -}}
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{- if eq .Values.cloud.traces.enabled true -}} {{- if eq .Values.cloud.traces.enabled true -}}
{{- if empty .Values.cloud.traces.secret -}} {{- if or (empty .Values.cloud.traces.endpoint) (or (empty .Values.cloud.traces.username) (empty .Values.cloud.traces.password)) -}}
{{- fail "if cloud.traces is enabled then the secret has to be filled in" -}} {{- fail "if cloud.traces is enabled then the endpoint, username and password have to be filled in" -}}
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
@ -34,6 +34,6 @@
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{- if empty .Values.metrics.retain -}} {{- if empty .Values.namespacesToMonitor -}}
{{- fail "All metrics will be collected, please specify some in metrics.retain" -}} {{- fail "No namespaces have been specified in namespacesToMonitor" -}}
{{- end -}} {{- end -}}

View File

@ -1,25 +1,10 @@
# Specify the namespaces to monitor here namespacesToMonitor:
# By default the chart will monitor the namespace it is installed in - loki
# namespacesToMonitor: - mimir
# - loki - tempo
# The name of the cluster where this will be installed clusterName: "meta-monitoring" # TODO check if this can be derived
clusterLabelValue: "meta"
# Set to true to write logs, metrics or traces to Grafana Cloud
# The secrets have to be created first
cloud:
logs:
enabled: true
secret: "logs"
metrics:
enabled: true
secret: "metrics"
traces:
enabled: true
secret: "traces"
# Set to true for a local version of logs, metrics or traces
local: local:
grafana:
enabled: false
logs: logs:
enabled: false enabled: false
metrics: metrics:
@ -28,262 +13,47 @@ local:
enabled: false enabled: false
minio: minio:
enabled: false # This should be set to true if any of the previous is enabled enabled: false # This should be set to true if any of the previous is enabled
grafana:
version: 11.4.3 cloud:
# Gateway ingress configuration
ingress:
# -- Specifies whether an ingress for the gateway should be created
enabled: true
# -- Ingress Class Name. MAY be required for Kubernetes versions >= 1.18
ingressClassName: ""
# -- Annotations for the gateway ingress
annotations: {}
# -- Labels for the gateway ingress
labels: {}
# -- Hosts configuration for the gateway ingress, passed through the `tpl` function to allow templating
hosts:
- host: monitoring.example.com
paths:
- path: /
# -- pathType (e.g. ImplementationSpecific, Prefix, .. etc.) might also be required by some Ingress Controllers
pathType: Prefix
# backend:
# service:
# name: TODO
# port:
# number: TODO
# -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating
#tls:
# - secretName: grafana-tls
# hosts:
# - monitoring.example.com
logs: logs:
# Adding regexes here will add a stage.replace block for logs. For more information see enabled: true
endpoint:
username:
password:
metrics:
enabled: true
endpoint:
username:
password:
traces:
enabled: true
endpoint:
username:
password:
# Adding regexes here will add a stage.replace block. For more information see
# https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block # https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
piiRegexes: null # This example replaces the word after password with ***** logs:
piiRegexes:
# This example replaces the word after password with *****
# - expression: "password (\\\\S+)" # - expression: "password (\\\\S+)"
# source: "" # Empty uses the log message # source: "" # Empty uses the log message
# replace: "*****"" # replace: "*****""
# The lines matching these will be kept in Loki
retain: # Set enabled = true to add the default logs/metrics/traces dashboards to the local Grafana
# This shows the queries
- executing query
- caller=metrics.go
# This shows any errors
- level=error
- level=warn
# Log lines for delete requests
- delete request for user added
- Started processing delete request
- delete request for user marked as processed
# This shows the ingest requests and is very noisy. Uncomment to include.
# - caller=push.go
# Additional log lines to retain
extraLogs: []
metrics:
# The list of metrics to retain for logging dashboards
retain:
- alloy_build_info
- alloy_config_last_load_success_timestamp_seconds
- alloy_config_last_load_successful
- alloy_config_load_failures_total
- alloy_component_controller_evaluating
- alloy_component_dependencies_wait_seconds
- alloy_component_dependencies_wait_seconds_bucket
- alloy_component_evaluation_seconds
- alloy_component_evaluation_seconds_bucket
- alloy_component_evaluation_seconds_count
- alloy_component_evaluation_seconds_sum
- alloy_component_evaluation_slow_seconds
- alloy_component_controller_running_components
- alloy_resources_machine_rx_bytes_total
- alloy_resources_machine_tx_bytes_total
- alloy_resources_process_cpu_seconds_total
- alloy_resources_process_resident_memory_bytes
- prometheus_remote_write_wal_samples_appended_total
- prometheus_remote_write_wal_storage_active_series
- cluster_node_info
- cluster_node_lamport_time
- cluster_node_update_observers
- cluster_node_gossip_health_score
- cluster_node_gossip_proto_version
- cluster_node_gossip_received_events_total
- cluster_node_peers
- cluster_transport_rx_bytes_total
- cluster_transport_rx_packets_total
- cluster_transport_rx_packets_failed_total
- cluster_transport_stream_rx_bytes_total
- cluster_transport_stream_rx_packets_failed_total
- cluster_transport_stream_rx_packets_total
- cluster_transport_stream_tx_bytes_total
- cluster_transport_stream_tx_packets_total
- cluster_transport_stream_tx_packets_failed_total
- cluster_transport_streams
- cluster_transport_tx_packets_total
- cluster_transport_tx_packets_failed_total
- cluster_transport_rx_packet_queue_length
- cluster_transport_tx_packet_queue_length
- container_cpu_usage_seconds_total
- container_fs_writes_bytes_total
- container_memory_working_set_bytes
- container_network_receive_bytes_total
- container_network_transmit_bytes_total
- container_spec_cpu_period
- container_spec_cpu_quota
- container_spec_memory_limit_bytes
- cortex_ingester_flush_queue_length
- cortex_prometheus_rule_group_iterations_total
- cortex_prometheus_rule_evaluation_failures_total
- cortex_prometheus_rule_group_rules
- cortex_prometheus_rule_group_last_duration_seconds
- cortex_prometheus_rule_group_last_evaluation_timestamp_seconds
- cortex_prometheus_rule_group_iterations_missed_total
- exporter_send_failed_spans_ratio_total
- exporter_sent_spans_ratio_total
- go_gc_duration_seconds
- go_gc_duration_seconds_count
- go_goroutines
- go_memstats_heap_inuse_bytes
- kubelet_volume_stats_used_bytes
- kubelet_volume_stats_capacity_bytes
- kube_deployment_created
- kube_persistentvolumeclaim_labels
- kube_pod_container_info
- kube_pod_container_resource_requests
- kube_pod_container_status_last_terminated_reason
- kube_pod_container_status_restarts_total
- loki_azure_blob_request_duration_seconds_bucket
- loki_boltdb_shipper_compact_tables_operation_duration_seconds
- loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds
- loki_boltdb_shipper_retention_marker_count_total
- loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket
- loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count
- loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum
- loki_boltdb_shipper_retention_marker_table_processed_total
- loki_boltdb_shipper_request_duration_seconds_bucket
- loki_boltdb_shipper_request_duration_seconds_count
- loki_boltdb_shipper_request_duration_seconds_sum
- loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket
- loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count
- loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum
- loki_boltdb_shipper_retention_sweeper_marker_files_current
- loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time
- loki_build_info
- loki_chunk_store_deduped_chunks_total
- loki_chunk_store_index_entries_per_chunk_bucket
- loki_chunk_store_index_entries_per_chunk_count
- loki_chunk_store_index_entries_per_chunk_sum
- loki_compactor_delete_requests_processed_total
- loki_compactor_delete_requests_received_total
- loki_compactor_deleted_lines
- loki_compactor_oldest_pending_delete_request_age_seconds
- loki_compactor_pending_delete_requests_count
- loki_consul_request_duration_seconds_bucket
- loki_discarded_samples_total
- loki_discarded_bytes_total
- loki_distributor_bytes_received_total
- loki_distributor_lines_received_total
- loki_distributor_structured_metadata_bytes_received_total
- loki_gcs_request_duration_seconds_bucket
- loki_gcs_request_duration_seconds_count
- loki_index_request_duration_seconds_bucket
- loki_index_request_duration_seconds_count
- loki_ingester_chunk_age_seconds_bucket
- loki_ingester_chunk_age_seconds_count
- loki_ingester_chunk_age_seconds_sum
- loki_ingester_chunk_bounds_hours_bucket
- loki_ingester_chunk_bounds_hours_count
- loki_ingester_chunk_bounds_hours_sum
- loki_ingester_chunk_entries_bucket
- loki_ingester_chunk_entries_count
- loki_ingester_chunk_entries_sum
- loki_ingester_chunk_size_bytes_bucket
- loki_ingester_chunk_utilization_bucket
- loki_ingester_chunk_utilization_count
- loki_ingester_chunk_utilization_sum
- loki_ingester_chunks_flushed_total
- loki_ingester_flush_queue_length
- loki_ingester_memory_chunks
- loki_ingester_memory_streams
- loki_ingester_streams_created_total
- loki_request_duration_seconds_bucket
- loki_request_duration_seconds_count
- loki_request_duration_seconds_sum
- loki_ruler_wal_appender_ready
- loki_ruler_wal_disk_size
- loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds
- loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds
- loki_ruler_wal_prometheus_remote_storage_samples_pending
- loki_ruler_wal_prometheus_remote_storage_samples_total
- loki_ruler_wal_samples_appended_total
- loki_ruler_wal_storage_created_series_total
- loki_s3_request_duration_seconds_bucket
- loki_s3_request_duration_seconds_count
- loki_write_batch_retries_total
- loki_write_dropped_bytes_total
- loki_write_dropped_entries_total
- loki_write_sent_bytes_total
- loki_write_sent_entries_total
- node_disk_read_bytes_total
- node_disk_written_bytes_total
- process_start_time_seconds
- processor_batch_batch_send_size_ratio_bucket
- processor_batch_metadata_cardinality_ratio
- processor_batch_timeout_trigger_send_ratio_total
- prometheus_remote_storage_bytes_total
- prometheus_remote_storage_enqueue_retries_total
- prometheus_remote_storage_highest_timestamp_in_seconds
- prometheus_remote_storage_metadata_bytes_total
- prometheus_remote_storage_queue_highest_sent_timestamp_seconds
- prometheus_remote_storage_samples_dropped_total
- prometheus_remote_storage_samples_failed_total
- prometheus_remote_storage_samples_pending
- prometheus_remote_storage_samples_retried_total
- prometheus_remote_storage_samples_total
- prometheus_remote_storage_sent_batch_duration_seconds_bucket
- prometheus_remote_storage_sent_batch_duration_seconds_count
- prometheus_remote_storage_sent_batch_duration_seconds_sum
- prometheus_remote_storage_shard_capacity
- prometheus_remote_storage_shards
- prometheus_remote_storage_shards_desired
- prometheus_remote_storage_shards_max
- prometheus_remote_storage_shards_min
- prometheus_remote_storage_succeeded_samples_total
- prometheus_remote_write_wal_samples_appended_total
- prometheus_remote_write_wal_storage_active_series
- prometheus_sd_discovered_targets
- prometheus_target_interval_length_seconds_count
- prometheus_target_interval_length_seconds_sum
- prometheus_target_scrapes_exceeded_sample_limit_total
- prometheus_target_scrapes_sample_duplicate_timestamp_total
- prometheus_target_scrapes_sample_out_of_bounds_total
- prometheus_target_scrapes_sample_out_of_order_total
- prometheus_target_sync_length_seconds_sum
- prometheus_wal_watcher_current_segment
- promtail_custom_bad_words_total
- promtail_dropped_bytes_total
- promtail_files_active_total
- promtail_read_bytes_total
- promtail_read_lines_total
- promtail_request_duration_seconds_bucket
- promtail_sent_entries_total
- rpc_server_duration_milliseconds_bucket
- receiver_accepted_spans_ratio_total
- receiver_refused_spans_ratio_total
- scrape_duration_seconds
- traces_exporter_sent_spans
- traces_exporter_send_failed_spans
- traces_loadbalancer_backend_outcome
- traces_loadbalancer_num_backends
- traces_receiver_accepted_spans
- traces_receiver_refused_spans
- up
# Additional metrics to retain
extraMetrics: []
# Set enabled = true to add the default logs dashboards to the local Grafana
dashboards: dashboards:
logs: logs:
enabled: true enabled: true
metrics:
enabled: true
traces:
enabled: true
global:
minio:
rootUser: "rootuser"
rootPassword: "rootpassword"
kubeStateMetrics: kubeStateMetrics:
# Scrape https://github.com/kubernetes/kube-state-metrics by default # Scrape https://github.com/kubernetes/kube-state-metrics by default
enabled: true enabled: true
@ -291,44 +61,23 @@ kubeStateMetrics:
# https://artifacthub.io/packages/helm/prometheus-community/kube-state-metrics/ # https://artifacthub.io/packages/helm/prometheus-community/kube-state-metrics/
# is used. Change this if kube-state-metrics is installed somewhere else. # is used. Change this if kube-state-metrics is installed somewhere else.
endpoint: kube-state-metrics.kube-state-metrics.svc.cluster.local:8080 endpoint: kube-state-metrics.kube-state-metrics.svc.cluster.local:8080
# The following are configuration for the dependencies. # The following are configuration for the dependencies.
# These should usually not be changed. # These should not be changed.
loki: loki:
loki: loki:
auth_enabled: false auth_enabled: false
schemaConfig:
configs:
- from: 2024-03-29
store: tsdb
object_store: s3
schema: v13
index:
prefix: index_
period: 24h
storage: storage:
type: "s3" type: "s3"
s3: s3:
endpoint: "meta-minio.meta.svc:9000"
access_key_id: rootuser
secret_access_key: rootpassword
insecure: true insecure: true
s3ForcePathStyle: true
bucketNames: bucketNames:
chunks: loki-chunks chunks: loki-chunks
ruler: loki-ruler ruler: loki-ruler
structuredConfig:
common:
storage:
s3:
access_key_id: "${rootUser}"
endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000"
secret_access_key: "${rootPassword}"
compactor:
retention_enabled: true
delete_request_store: s3
limits_config:
retention_period: 30d
lokiCanary:
enabled: false
test:
enabled: false
monitoring: monitoring:
dashboards: dashboards:
enabled: false enabled: false
@ -342,85 +91,44 @@ loki:
installOperator: false installOperator: false
lokiCanary: lokiCanary:
enabled: false enabled: false
write: test:
extraArgs: enabled: false
- "-config.expand-env=true"
extraEnvFrom: grafana-agent:
- secretRef: agent:
name: "minio"
read:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
backend:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
alloy:
alloy:
clustering:
enabled: true
configMap: configMap:
create: false create: false
name: "agent-configmap" name: "agent-configmap"
key: 'config.river' key: 'config.river'
resources:
requests:
cpu: '1000m'
memory: '600Mi'
limits:
memory: '4Gi'
extraPorts:
- name: "otel"
port: 4317
targetPort: 4317
protocol: "TCP"
- name: "thrifthttp"
port: 14268
targetPort: 14268
protocol: "TCP"
controller:
type: "statefulset"
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 30
targetMemoryUtilizationPercentage: 90
targetCPUUtilizationPercentage: 90
mimir-distributed: mimir-distributed:
minio: minio:
enabled: false enabled: false
global:
extraEnvFrom:
- secretRef:
name: "minio"
mimir: mimir:
structuredConfig: structuredConfig:
alertmanager_storage: alertmanager_storage:
s3: s3:
bucket_name: mimir-ruler bucket_name: mimir-ruler
access_key_id: "{{ .Values.global.minio.rootUser }}"
endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000"
secret_access_key: "{{ .Values.global.minio.rootPassword }}"
insecure: true
blocks_storage: blocks_storage:
backend: s3 backend: s3
s3: s3:
bucket_name: mimir-tsdb bucket_name: mimir-tsdb
access_key_id: "{{ .Values.global.minio.rootUser }}"
endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000"
secret_access_key: "{{ .Values.global.minio.rootPassword }}"
insecure: true
ruler_storage: ruler_storage:
s3: s3:
bucket_name: mimir-ruler bucket_name: mimir-ruler
common: access_key_id: "{{ .Values.global.minio.rootUser }}"
storage:
backend: s3
s3:
bucket_name: mimir-ruler
access_key_id: "${rootUser}"
endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000"
secret_access_key: "${rootPassword}" secret_access_key: "{{ .Values.global.minio.rootPassword }}"
insecure: true insecure: true
limits:
compactor_blocks_retention_period: 30d
tempo-distributed: tempo-distributed:
tempo: tempo:
structuredConfig: structuredConfig:
@ -430,47 +138,19 @@ tempo-distributed:
s3: s3:
bucket: tempo bucket: tempo
endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000"
access_key: "${rootUser}" access_key: "{{ .Values.global.minio.rootUser }}"
secret_key: "${rootPassword}" secret_key: "{{ .Values.global.minio.rootPassword }}"
insecure: true insecure: true
distributor:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
ingester:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
compactor:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
querier:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
queryFrontend:
extraArgs:
- "-config.expand-env=true"
extraEnvFrom:
- secretRef:
name: "minio"
traces: traces:
otlp: otlp:
http: http:
enabled: true enabled: true
grpc: grpc:
enabled: true enabled: true
minio: minio:
existingSecret: "minio" rootUser: rootuser
rootPassword: rootpassword
buckets: buckets:
- name: loki-chunks - name: loki-chunks
policy: none policy: none

Some files were not shown because too many files have changed in this diff Show More