Compare commits

..

1 Commits

Author SHA1 Message Date
MichelHollands
4cce0cb4bc Update Mimir Distributed 2024-04-16 08:02:25 +00:00
33 changed files with 2414 additions and 6016 deletions

View File

@@ -1,3 +0,0 @@
owner: grafana
git-repo: helm-charts
skip-existing: true

View File

@@ -1,15 +0,0 @@
## Reference: https://github.com/helm/chart-testing/blob/master/doc/ct_lint-and-install.md
remote: origin
target-branch: main
chart-dirs:
- charts
chart-repos:
- grafana=https://grafana.github.io/helm-charts
- minio=https://charts.min.io
validate-chart-schema: true
validate-maintainers: true
validate-yaml: true
exclude-deprecated: true
excluded-charts: []
namespace: meta-monitoring # Need to set the namespace because we create the secret there
release-label: app.kubernetes.io/instance

View File

@@ -5,7 +5,7 @@ on:
workflow_dispatch:
schedule:
# Run once a day
- cron: '0 7 * * *'
- cron: '0 0 * * *'
permissions:
contents: "write"
@@ -47,6 +47,7 @@ jobs:
labels: dependencies
branch: chore/update-loki
delete-branch: true
team-reviewers: loki-team
updateGrafanaAlloy:
name: Update the Grafana Alloy subchart
@@ -79,6 +80,7 @@ jobs:
labels: dependencies
branch: chore/update-grafana-alloy
delete-branch: true
team-reviewers: loki-team
updateMimirDistributed:
name: Update the Mimir Distributed subchart
@@ -111,6 +113,7 @@ jobs:
labels: dependencies
branch: chore/update-mimir-distributed
delete-branch: true
team-reviewers: loki-team
updateTempoDistributed:
name: Update the Tempo Distributed subchart
@@ -143,6 +146,7 @@ jobs:
labels: dependencies
branch: chore/update-tempo-distributed
delete-branch: true
team-reviewers: loki-team
updateMinio:
name: Update the Minio subchart
@@ -175,3 +179,4 @@ jobs:
labels: dependencies
branch: chore/update-minio
delete-branch: true
team-reviewers: loki-team

View File

@@ -1,175 +0,0 @@
name: Release Helm chart
on:
workflow_dispatch:
env:
CR_CONFIGFILE: "${{ github.workspace }}/source/.github/configs/cr.yaml"
CT_CONFIGFILE: "${{ github.workspace }}/source/.github/configs/ct.yaml"
CR_INDEX_PATH: "${{ github.workspace }}/.cr-index"
CR_PACKAGE_PATH: "${{ github.workspace }}/.cr-release-packages"
CR_TOOL_PATH: "${{ github.workspace }}/.cr-tool"
CR_VERSION: "1.5.0"
jobs:
setup:
runs-on: ubuntu-latest
outputs:
changed: ${{ steps.list-changed.outputs.changed }}
chartpath: ${{ steps.list-changed.outputs.chartpath }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
path: source
- name: Install chart-testing
uses: helm/chart-testing-action@v2
- name: List changed charts
id: list-changed
run: |
cd source
latest_tag=$( if ! git describe --tags --abbrev=0 --match='helm-chart/*' 2> /dev/null ; then git rev-list --max-parents=0 --first-parent HEAD; fi )
echo "Running: ct list-changed --config ${CT_CONFIGFILE} --since ${latest_tag} --target-branch ${{ github.ref_name }}"
changed=$(ct list-changed --config "${CT_CONFIGFILE}" --since "${latest_tag}" --target-branch "${{ github.ref_name }}")
echo "${changed}"
num_changed=$(wc -l <<< ${changed})
if [[ "${num_changed}" -gt "1" ]] ; then
echo "More than one chart changed, exiting"
exit 1
fi
if [[ -n "${changed}" ]]; then
name=$(yq ".name" < ${changed}/Chart.yaml)
version=$(yq ".version" < ${changed}/Chart.yaml)
tagname="v${version}"
if [ $(git tag -l "${tagname}") ]; then
echo "Tag ${tagname} already exists, skipping release"
echo "changed=false" >> $GITHUB_OUTPUT
else
echo "Releasing ${changed}"
echo "changed=true" >> $GITHUB_OUTPUT
echo "chartpath=${changed}" >> $GITHUB_OUTPUT
fi
else
echo "No charts have changed, skipping release"
echo "changed=false" >> $GITHUB_OUTPUT
fi
release:
needs: [setup]
runs-on: ubuntu-latest
if: needs.setup.outputs.changed == 'true'
permissions:
contents: write
id-token: write
steps:
- id: get-secrets
uses: grafana/shared-workflows/actions/get-vault-secrets@main
with:
# Secrets placed in the ci/repo/grafana/<repo>/<path> path in Vault
repo_secrets: |
APP_ID=github-app:app-id
PRIVATE_KEY=github-app:private-key
- uses: actions/create-github-app-token@v1
id: app-token
with:
app-id: ${{ env.APP_ID }}
private-key: ${{ env.PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
path: source
- name: Configure Git
run: |
cd source
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Checkout helm-charts
uses: actions/checkout@v4
with:
fetch-depth: 0
repository: grafana/helm-charts
path: helm-charts
token: "${{ steps.app-token.outputs.token }}"
- name: Configure Git for helm-charts
run: |
cd helm-charts
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Set up Helm
uses: azure/setup-helm@v4
- name: Parse Chart.yaml
id: parse-chart
run: |
cd source
changed="${{ needs.setup.outputs.chartpath }}"
description=$(yq ".description" < ${changed}/Chart.yaml)
name=$(yq ".name" < ${changed}/Chart.yaml)
version=$(yq ".version" < ${changed}/Chart.yaml)
echo "chartpath=${changed}" >> $GITHUB_OUTPUT
echo "desc=${description}" >> $GITHUB_OUTPUT
echo "tagname=v${version}" >> $GITHUB_OUTPUT
echo "packagename=${name}-${version}" >> $GITHUB_OUTPUT
- name: Install CR tool
run: |
mkdir "${CR_TOOL_PATH}"
mkdir "${CR_PACKAGE_PATH}"
mkdir "${CR_INDEX_PATH}"
curl -sSLo cr.tar.gz "https://github.com/helm/chart-releaser/releases/download/v${CR_VERSION}/chart-releaser_${CR_VERSION}_linux_amd64.tar.gz"
tar -xzf cr.tar.gz -C "${CR_TOOL_PATH}"
rm -f cr.tar.gz
- name: Create Helm package
run: |
cd source
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add minio https://charts.min.io
"${CR_TOOL_PATH}/cr" package "${{ steps.parse-chart.outputs.chartpath }}" --config "${CR_CONFIGFILE}" --package-path "${CR_PACKAGE_PATH}"
- name: Make a release on this repo
uses: softprops/action-gh-release@v2
with:
name: ${{ steps.parse-chart.outputs.tagname }}
repository: grafana/meta-monitoring-chart
tag_name: ${{ steps.parse-chart.outputs.tagname }}
token: ${{ steps.app-token.outputs.token }}
generate_release_notes: true
files: |
${{ env.CR_PACKAGE_PATH }}/${{ steps.parse-chart.outputs.packagename }}.tgz
# Note that this creates a release in grafana/helm-charts with a new tag.
# The tag name in grafana/helm-charts is <package>-<version>, while the
# tag name for grafana/meta-monitoring-chart is <version>.
- name: Make release on Helm Charts
uses: softprops/action-gh-release@v2
with:
name: ${{ steps.parse-chart.outputs.packagename }}
repository: grafana/helm-charts
tag_name: ${{ steps.parse-chart.outputs.packagename }}
token: ${{ steps.app-token.outputs.token }}
body: |
${{ steps.parse-chart.outputs.desc }}
Source commit: https://github.com/${{ github.repository }}/commit/${{ github.sha }}
Tag on source: https://github.com/${{ github.repository }}/releases/tag/${{ steps.parse-chart.outputs.tagname }}
files: |
${{ env.CR_PACKAGE_PATH }}/${{ steps.parse-chart.outputs.packagename }}.tgz
- name: Update helm-charts index.yaml
run: |
cd helm-charts
"${CR_TOOL_PATH}/cr" index --config "${CR_CONFIGFILE}" --token "${{ steps.app-token.outputs.token }}" --index-path "${CR_INDEX_PATH}" --package-path "${CR_PACKAGE_PATH}" --push

1
.gitignore vendored
View File

@@ -1 +0,0 @@
.DS_Store

View File

@@ -1,9 +1,20 @@
# meta-monitoring-chart
This is a meta-monitoring chart for Loki.
This is a meta-monitoring chart for GEL, GEM and GET. It should be installed in a
separate namespace next to GEM, GEL or GET installations.
Note that this is pre-production software at the moment.
## Preparation
Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml).
1. Add or remove the namespaces to monitor in the `namespacesToMonitor` setting
1. Set the cluster name in the `clusterName` setting. This will be added as a label to all logs, metrics and traces.
1. Create a `meta` namespace.
## Local and cloud modes
The chart has 2 modes: local and cloud. In the local mode logs, metrics and/or traces are sent
@@ -23,6 +34,12 @@ Both modes can be enabled at the same time.
## Installation
```
helm install -n meta --skip-crds -f values.yaml meta ./charts/meta-monitoring
```
If the platform supports CRDs the `--skip-crds` option can be removed. However the CRDs are not used by this chart.
For more instructions including how to update the chart go to the [installation](docs/installation.md) page.
## Supported features
@@ -42,6 +59,7 @@ Most of these features are enabled by default. See the values.yaml file for how
## Caveats
- The [loki.source.kubernetes](https://grafana.com/docs/agent/latest/flow/reference/components/loki.source.kubernetes/) component of the Grafana Agent is used to scrape Kubernetes log files. This component is marked experimental at the moment.
- This has not been tested on Openshift yet.
- The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
- MinIO is used as storage at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.

View File

@@ -1,7 +1,7 @@
dependencies:
- name: loki
repository: https://grafana.github.io/helm-charts
version: 6.3.4
version: 5.47.2
- name: alloy
repository: https://grafana.github.io/helm-charts
version: 0.1.1
@@ -10,9 +10,9 @@ dependencies:
version: 5.3.0
- name: tempo-distributed
repository: https://grafana.github.io/helm-charts
version: 1.9.4
version: 1.9.2
- name: minio
repository: https://charts.min.io
version: 5.2.0
digest: sha256:33c7285f1b517a9de4c15a9cb5e32478e8fd07cba3601fa93f03f4925d792f04
generated: "2024-04-29T07:02:49.713859154Z"
version: 5.1.0
digest: sha256:b9521eea011cdf82856ee909f3f42f98722393678799094de2307e89c92bc045
generated: "2024-04-16T08:02:21.931525336Z"

View File

@@ -13,7 +13,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.2
version: 0.0.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
@@ -22,7 +22,7 @@ appVersion: "0.0.1"
dependencies:
- name: loki
repository: https://grafana.github.io/helm-charts
version: 6.3.4
version: "5.47.2"
condition: local.logs.enabled
- name: alloy
repository: https://grafana.github.io/helm-charts
@@ -33,9 +33,9 @@ dependencies:
condition: local.metrics.enabled
- name: tempo-distributed
repository: https://grafana.github.io/helm-charts
version: 1.9.4
version: 1.9.2
condition: local.traces.enabled
- name: minio
repository: https://charts.min.io
version: 5.2.0
version: 5.1.0
condition: local.minio.enabled

Binary file not shown.

Binary file not shown.

View File

@@ -64,7 +64,7 @@
"span": 6,
"targets": [
{
"expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})",
"expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})",
"format": "time_series",
"legendFormat": "series",
"legendLink": null
@@ -111,7 +111,7 @@
"span": 6,
"targets": [
{
"expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})",
"expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})",
"format": "time_series",
"legendFormat": "chunks",
"legendLink": null
@@ -171,19 +171,19 @@
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1",
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1",
"format": "time_series",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1",
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1",
"format": "time_series",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Average",
"refId": "C"
@@ -249,19 +249,19 @@
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1e3",
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1e3",
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Average",
"refId": "C"
@@ -339,19 +339,19 @@
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1",
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1",
"format": "time_series",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1",
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1",
"format": "time_series",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Average",
"refId": "C"
@@ -416,7 +416,7 @@
"span": 6,
"targets": [
{
"expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))",
"format": "time_series",
"legendFormat": "Index Entries",
"legendLink": null
@@ -475,7 +475,7 @@
"span": 6,
"targets": [
{
"expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}",
"expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
@@ -673,7 +673,7 @@
"stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -732,7 +732,7 @@
"span": 6,
"targets": [
{
"expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
@@ -780,7 +780,7 @@
"stack": true,
"targets": [
{
"expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{reason}}",
"legendLink": null
@@ -843,7 +843,7 @@
"span": 12,
"targets": [
{
"expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))",
"format": "heatmap",
"intervalFactor": 2,
"legendFormat": "{{le}}",
@@ -905,7 +905,7 @@
"span": 12,
"targets": [
{
"expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)",
"expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"intervalFactor": 2,
"legendFormat": "{{le}}",
@@ -981,19 +981,19 @@
"span": 12,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))",
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
"expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))",
"expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))",
"format": "time_series",
"legendFormat": "p90",
"legendLink": null
},
{
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))",
"expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
@@ -1052,19 +1052,19 @@
"span": 12,
"targets": [
{
"expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))",
"expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
},
{
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))",
"expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
"expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))",
"format": "time_series",
"legendFormat": "avg",
"legendLink": null

View File

@@ -579,7 +579,7 @@
"span": 6,
"targets": [
{
"expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}[$__rate_interval])) by (user)",
"expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)",
"format": "time_series",
"legendFormat": "{{user}}",
"legendLink": null
@@ -606,7 +606,7 @@
"span": 6,
"targets": [
{
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ",
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ",
"refId": "A"
}
],
@@ -619,7 +619,7 @@
"span": 6,
"targets": [
{
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"",
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"",
"refId": "A"
}
],
@@ -701,16 +701,6 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"hide": 0,
"label": null,
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
}
]
},

View File

@@ -114,11 +114,6 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -241,7 +236,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval]))",
"expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))",
"refId": "A"
}
],
@@ -292,11 +287,6 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -383,11 +373,6 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "binBps"
}
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -423,7 +408,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
"refId": "A"
}
],
@@ -474,11 +459,6 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "binBps"
}
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -514,7 +494,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
"refId": "A"
}
],
@@ -652,11 +632,6 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"unit": "ops"
}
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -692,7 +667,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])) by (level)",
"expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)",
"legendFormat": "{{level}}",
"refId": "A"
}
@@ -744,11 +719,6 @@
"dashLength": 10,
"dashes": false,
"datasource": "$loki_datasource",
"fieldConfig": {
"defaults": {
"unit": "ops"
}
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -801,7 +771,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__interval])) by (level)",
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)",
"intervalFactor": 3,
"legendFormat": "{{level}}",
"refId": "A"

View File

@@ -0,0 +1,723 @@
{
"annotations": {
"list": [ ]
},
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"iteration": 1635347545534,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"loki"
],
"targetBlank": false,
"title": "Loki Dashboards",
"type": "dashboards"
}
],
"liveNow": false,
"panels": [
{
"datasource": "${datasource}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 2,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.0-38205pre",
"targets": [
{
"datasource": "${datasource}",
"exemplar": false,
"expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Appenders Not Ready",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 11,
"x": 2,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Samples Appended to WAL per Second",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Series are unique combinations of labels",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 11,
"x": 13,
"y": 0
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Series Created per Second",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 10
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Write Behind",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 10
},
"id": 7,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Samples Sent per Second",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 20
},
"id": 8,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "WAL Disk Size",
"type": "timeseries"
},
{
"datasource": "${datasource}",
"description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 20
},
"id": 9,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": "${datasource}",
"exemplar": true,
"expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0",
"interval": "",
"legendFormat": "{{tenant}}",
"refId": "A"
}
],
"title": "Pending Samples",
"type": "timeseries"
}
],
"refresh": "10s",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"loki"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(loki_build_info, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"hide": 0,
"label": null,
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "tenant",
"options": [ ],
"query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))",
"refresh": 0,
"regex": "/\"([^\"]+)\"/",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Loki / Recording Rules",
"uid": "recording-rules",
"version": 0,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -103,19 +103,19 @@
"span": 4,
"targets": [
{
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\", resource=\"cpu\"} > 0)",
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"})",
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
@@ -204,19 +204,19 @@
"span": 4,
"targets": [
{
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"})",
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\", resource=\"memory\"} > 0)",
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} > 0)",
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
@@ -266,7 +266,7 @@
"span": 4,
"targets": [
{
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/\"(.*compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"\"})",
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
@@ -1367,7 +1367,7 @@
"span": 12,
"targets": [
{
"expr": "{cluster=~\"$cluster\", job=~\"($namespace)/(.*compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}",
"expr": "{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}",
"refId": "A"
}
],

View File

@@ -22,270 +22,6 @@
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": "250px",
"panels": [
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "request"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#FFC000",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
}
]
},
{
"matcher": {
"id": "byName",
"options": "limit"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E02F44",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
}
]
}
]
},
"id": 1,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 4,
"targets": [
{
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"cpu\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
}
],
"title": "CPU",
"tooltip": {
"sort": 2
},
"type": "timeseries"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "request"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#FFC000",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
}
]
},
{
"matcher": {
"id": "byName",
"options": "limit"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E02F44",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
}
]
}
]
},
"id": 2,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 4,
"targets": [
{
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"memory\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
}
],
"title": "Memory (workingset)",
"tooltip": {
"sort": 2
},
"type": "timeseries"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "bytes"
},
"overrides": [ ]
},
"id": 3,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 4,
"targets": [
{
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*distributor\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
}
],
"title": "Memory (go heap inuse)",
"tooltip": {
"sort": 2
},
"type": "timeseries"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Distributor",
"titleSize": "h6"
},
{
"collapse": false,
"collapsed": false,
@@ -315,7 +51,7 @@
"overrides": [ ]
},
"gridPos": { },
"id": 4,
"id": 1,
"links": [ ],
"options": {
"legend": {
@@ -328,7 +64,7 @@
},
"targets": [
{
"expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})",
"expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
@@ -404,7 +140,7 @@
]
},
"gridPos": { },
"id": 5,
"id": 2,
"links": [ ],
"options": {
"legend": {
@@ -417,19 +153,19 @@
},
"targets": [
{
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", resource=\"cpu\"} > 0)",
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"})",
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
@@ -505,7 +241,7 @@
]
},
"gridPos": { },
"id": 6,
"id": 3,
"links": [ ],
"options": {
"legend": {
@@ -518,19 +254,19 @@
},
"targets": [
{
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"})",
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", resource=\"memory\"} > 0)",
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"} > 0)",
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
@@ -567,7 +303,7 @@
"overrides": [ ]
},
"gridPos": { },
"id": 7,
"id": 4,
"links": [ ],
"options": {
"legend": {
@@ -580,7 +316,7 @@
},
"targets": [
{
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})",
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
@@ -617,7 +353,7 @@
"overrides": [ ]
},
"gridPos": { },
"id": 8,
"id": 5,
"links": [ ],
"options": {
"legend": {
@@ -630,7 +366,7 @@
},
"targets": [
{
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
"format": "time_series",
"legendFormat": "{{pod}} - {{device}}",
"legendLink": null
@@ -664,7 +400,7 @@
"overrides": [ ]
},
"gridPos": { },
"id": 9,
"id": 6,
"links": [ ],
"options": {
"legend": {
@@ -677,7 +413,7 @@
},
"targets": [
{
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
"format": "time_series",
"legendFormat": "{{pod}} - {{device}}",
"legendLink": null
@@ -711,7 +447,7 @@
"overrides": [ ]
},
"gridPos": { },
"id": 10,
"id": 7,
"links": [ ],
"options": {
"legend": {
@@ -724,7 +460,7 @@
},
"targets": [
{
"expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary).*\"})",
"expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-write.*\"})",
"format": "time_series",
"legendFormat": "{{persistentvolumeclaim}}",
"legendLink": null
@@ -738,7 +474,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Ingester",
"title": "Write path",
"titleSize": "h6",
"type": "row"
}

View File

@@ -215,7 +215,7 @@
"stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -263,7 +263,7 @@
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
@@ -271,7 +271,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
@@ -279,7 +279,7 @@
"step": 10
},
{
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
@@ -313,7 +313,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Distributor",
"title": "Write Path",
"titleSize": "h6"
},
{
@@ -358,7 +358,7 @@
"span": 6,
"targets": [
{
"expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval]))",
"expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "bytes",
"legendLink": null
@@ -406,7 +406,7 @@
"stack": true,
"targets": [
{
"expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval]))",
"expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{tenant}}",
"legendLink": null
@@ -438,7 +438,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Distributor - Structured Metadata",
"title": "Write Path",
"titleSize": "h6"
},
{
@@ -634,7 +634,7 @@
"stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -682,895 +682,19 @@
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A",
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B",
"step": 10
},
{
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C",
"step": 10
}
],
"title": "Latency",
"type": "timeseries",
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Ingester - Zone Aware",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
"1xx": "#EAB839",
"2xx": "#7EB26D",
"3xx": "#6ED0E0",
"4xx": "#EF843C",
"5xx": "#E24D42",
"OK": "#7EB26D",
"cancel": "#A9A9A9",
"error": "#E24D42",
"success": "#7EB26D"
},
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 100,
"lineWidth": 0,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
}
},
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "1xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "2xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "3xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6ED0E0",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "4xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EF843C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "5xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E24D42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "OK"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "cancel"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#A9A9A9",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "error"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E24D42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "success"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
}
]
},
"fill": 10,
"id": 7,
"linewidth": 0,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "QPS",
"type": "timeseries"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "ms"
},
"overrides": [ ]
},
"id": 8,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
"refId": "A",
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
"refId": "B",
"step": 10
},
{
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "C",
"step": 10
}
],
"title": "Latency",
"type": "timeseries",
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Ingester",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
"1xx": "#EAB839",
"2xx": "#7EB26D",
"3xx": "#6ED0E0",
"4xx": "#EF843C",
"5xx": "#E24D42",
"OK": "#7EB26D",
"cancel": "#A9A9A9",
"error": "#E24D42",
"success": "#7EB26D"
},
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 100,
"lineWidth": 0,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
}
},
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "1xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "2xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "3xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6ED0E0",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "4xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EF843C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "5xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E24D42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "OK"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "cancel"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#A9A9A9",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "error"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E24D42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "success"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
}
]
},
"fill": 10,
"id": 9,
"linewidth": 0,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "QPS",
"type": "timeseries"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "ms"
},
"overrides": [ ]
},
"id": 10,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3",
"expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3",
"expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Average",
"refId": "C"
}
],
"title": "Latency",
"type": "timeseries",
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Index",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
"1xx": "#EAB839",
"2xx": "#7EB26D",
"3xx": "#6ED0E0",
"4xx": "#EF843C",
"5xx": "#E24D42",
"OK": "#7EB26D",
"cancel": "#A9A9A9",
"error": "#E24D42",
"success": "#7EB26D"
},
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 100,
"lineWidth": 0,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
}
},
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "1xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "2xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "3xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6ED0E0",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "4xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EF843C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "5xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E24D42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "OK"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "cancel"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#A9A9A9",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "error"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E24D42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "success"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
}
]
},
"fill": 10,
"id": 11,
"linewidth": 0,
"links": [ ],
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "QPS",
"type": "timeseries"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 10,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "ms"
},
"overrides": [ ]
},
"id": 12,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"span": 6,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval]))",
"expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Average",
"refId": "C"

View File

@@ -1,3 +1,4 @@
groups:
- name: "loki_rules"
rules:
- expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m]))

View File

@@ -6,15 +6,6 @@
{{- join ", " $list }}
{{- end }}
{{- define "agent.all_namespaces" -}}
{{- $list := list }}
{{- range .Values.namespacesToMonitor }}
{{- $list = append $list (printf "%s" .) }}
{{- end }}
{{- $list = append $list .Release.Namespace }}
{{- join "|" $list }}
{{- end }}
{{- define "agent.loki_write_targets" -}}
{{- $list := list }}
{{- if .Values.local.logs.enabled }}

View File

@@ -40,12 +40,10 @@ data:
{{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }}
// Logs
{{- if .Values.cloud.logs.enabled }}
remote.kubernetes.secret "logs_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.logs.secret -}}"
}
{{- end }}
loki.source.kubernetes "pods" {
clustering {
@@ -82,12 +80,10 @@ data:
{{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }}
// Metrics
{{- if .Values.cloud.metrics.enabled }}
remote.kubernetes.secret "metrics_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.metrics.secret -}}"
}
{{- end }}
discovery.kubernetes "metric_pods" {
role = "pod"
@@ -141,13 +137,6 @@ data:
action = "keep"
}
rule {
source_labels = ["namespace"]
regex = "{{ include "agent.all_namespaces" . }}"
action = "keep"
}
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
}
{{- if .Values.kubeStateMetrics.enabled }}
@@ -165,10 +154,6 @@ data:
// Based on https://github.com/Chewie/loutretelecom-manifests/blob/main/manifests/addons/monitoring/config.river
discovery.kubernetes "all_nodes" {
role = "node"
namespaces {
own_namespace = true
names = [ {{ include "agent.namespaces" . }} ]
}
}
discovery.relabel "all_nodes" {
@@ -282,12 +267,10 @@ data:
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
// Traces
{{- if .Values.cloud.traces.enabled }}
remote.kubernetes.secret "traces_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.traces.secret -}}"
}
{{- end }}
// Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river
otelcol.receiver.otlp "otlp_receiver" {
@@ -322,7 +305,7 @@ data:
{{- if .Values.local.logs.enabled }}
loki.write "local" {
endpoint {
url = "http://{{- .Release.Namespace -}}-loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
url = "http://loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
}
}
{{- end }}

View File

@@ -32,7 +32,7 @@ data:
uid: loki_ds
# <string> Sets the data source's URL, including the
# port.
url: http://{{- $.Release.Namespace -}}-loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
url: http://loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
# <bool> Toggles whether the data source is pre-selected
# for new panels. You can set only one default
# data source per organization.

View File

@@ -1,11 +1,12 @@
# Specify the namespaces to monitor here
namespacesToMonitor:
- loki
- mimir
- tempo
# The name of the cluster where this will be installed
clusterLabelValue: "meta-monitoring"
# Set to true to write logs, metrics or traces to Grafana Cloud
# The secrets have to be created first
cloud:
logs:
enabled: true
@@ -103,9 +104,7 @@ metrics:
- go_memstats_heap_inuse_bytes
- kubelet_volume_stats_used_bytes
- kubelet_volume_stats_capacity_bytes
- kube_deployment_created
- kube_persistentvolumeclaim_labels
- kube_pod_container_info
- kube_pod_container_resource_requests
- kube_pod_container_status_last_terminated_reason
- kube_pod_container_status_restarts_total
@@ -138,7 +137,6 @@ metrics:
- loki_distributor_bytes_received_total
- loki_distributor_lines_received_total
- loki_distributor_structured_metadata_bytes_received_total
- loki_index_request_duration_seconds_count
- loki_ingester_chunk_age_seconds_bucket
- loki_ingester_chunk_age_seconds_count
- loki_ingester_chunk_age_seconds_sum
@@ -204,15 +202,6 @@ kubeStateMetrics:
loki:
loki:
auth_enabled: false
schemaConfig:
configs:
- from: 2024-03-29
store: tsdb
object_store: s3
schema: v13
index:
prefix: index_
period: 24h
storage:
type: "s3"
s3:
@@ -230,13 +219,8 @@ loki:
secret_access_key: "{{ .Values.global.minio.rootPassword }}"
compactor:
retention_enabled: true
delete_request_store: s3
limits_config:
retention_period: 30d
lokiCanary:
enabled: false
test:
enabled: false
monitoring:
dashboards:
enabled: false

View File

@@ -1,10 +0,0 @@
# Create a new release
1. Update the version field in charts/meta-monitoring/Chart.yaml in a new PR. Merge this PR if approved.
2. On the [Actions tab](https://github.com/grafana/meta-monitoring-chart/actions):
- Select `Release Helm chart` in the workflows on the left
- Click the `Run workflow` button
- Leave the `main` branch as is
- Click the green `Run workflow` button

View File

@@ -1,19 +1,5 @@
# Install this chart
## Preparation for Cloud mode (preferred)
1. Use an existing Grafana Cloud account or setup a new one. Then create an access token:
1. In Grafana go to Administration -> Users and Access -> Cloud access policies.
1. Click `Create access policy`.
1. Fill in the `Display name` field and check the `Write` check box for metrics, logs and traces. Then click `Create`.
1. On the newly created access policy click `Add token`.
1. Fill in the `Token name` field and click `Create`. Make a copy of the token as it will be used later on.
1. Create the meta namespace
```
@@ -25,100 +11,32 @@
```
kubectl create secret generic logs -n meta \
--from-literal=username=<logs username> \
--from-literal=password=<token>
--from-literal=password=<logs password>
--from-literal=endpoint='https://logs-prod-us-central1.grafana.net/loki/api/v1/push'
kubectl create secret generic metrics -n meta \
--from-literal=username=<metrics username> \
--from-literal=password=<token>
--from-literal=password=<metrics password>
--from-literal=endpoint='https://prometheus-us-central1.grafana.net/api/prom/push'
kubectl create secret generic traces -n meta \
--from-literal=username=<traces username> \
--from-literal=password=<token>
--from-literal=password=<traces password>
--from-literal=endpoint='https://tempo-us-central1.grafana.net/tempo'
```
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and Tempo instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki, Prometheus/Mimir and Tempo.
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
```
namespacesToMonitor:
- loki
cloud:
logs:
enabled: true
secret: "logs"
metrics:
enabled: true
secret: "metrics"
traces:
enabled: true
secret: "traces"
```
## Preparation for Local mode
1. Create the meta namespace
```
kubectl create namespace meta
```
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). An example minimal values.yaml looks like this:
```
namespacesToMonitor:
- loki
cloud:
logs:
enabled: false
metrics:
enabled: false
traces:
enabled: false
local:
grafana:
enabled:true
logs:
enabled: true
metrics:
enabled: true
traces:
enabled: true
minio:
enabled: true
```
## Installing the chart
1. Add the repo
```
helm repo add grafana https://grafana.github.io/helm-charts
```
1. Fetch the latest charts from the grafana repo
```
helm repo update grafana
```
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed.
1. Install this helm chart
```
helm install -n meta -f values.yaml meta grafana/meta-monitoring
helm install -n meta -f values.yaml meta ./charts/meta-monitoring
```
1. Upgrade
```
helm upgrade --install -f values.yaml -n meta meta grafana/meta-monitoring
helm upgrade --install -f values.yaml -n meta meta ./charts/meta-monitoring
```
1. Delete this chart:
@@ -126,41 +44,3 @@
```
helm delete -n meta meta
```
## Installing the dashboards and rules on Grafana Cloud
## Installing the dashboards on Grafana Cloud
Only the files for the application monitored have to be copied. When monitoring Loki import dashboard files starting with 'loki-'.
For each of the dashboard files in charts/meta-monitoring/src/dashboards folder do the following:
1. Click on 'Dashboards' in Grafana
1. Click on the 'New` button and select 'Import'
1. Drop the dashboard file to the 'Upload dashboard JSON file' drop area
1. Click 'Import'
## Installing the rules on Grafana Cloud
1. Select the rules files in charts/meta-monitoring/src/rules for the application to monitor. When monitoring Loki use loki-rules.yaml.
1. Install mimirtool as per the [instructions](https://grafana.com/docs/mimir/latest/manage/tools/mimirtool/)
1. Create an access policy with Read and Write permission for Rules. Also create a token and record the token.
1. Get your cloud Prometheus endpoint and Instance ID from the `Prometheus` page in `Stacks`.
1. Use them to load the rules using mimirtool as follows:
```
mimirtool rules load --address=<your_cloud_prometheus_endpoint> --id=<your_instance_id> --key=<your_cloud_access_policy_token> *.yaml
```
1. To check the rules you have uploaded run:
```
mimirtool rules print --address=<your_cloud_prometheus_endpoint> --id=<your_instance_id> --key=<your_cloud_access_policy_token>
```

View File

@@ -1,20 +0,0 @@
#!/usr/bin/env bash
clean_up() {
test -d "$tmp_dir" && rm -fr "$tmp_dir"
}
here=${PWD}
tmp_dir=$( mktemp -d -t my-script )
cd $tmp_dir
echo "Cloning Loki"
git clone --filter=blob:none --no-checkout "https://github.com/grafana/loki"
cd loki
git sparse-checkout init --cone
git checkout main
git sparse-checkout set production/loki-mixin
echo "Copying production/loki-mixin to ${here}"
cp -r production ${here}

View File

@@ -1,18 +0,0 @@
(import 'dashboards.libsonnet') +
(import 'alerts.libsonnet') +
(import 'recording_rules.libsonnet') + {
grafanaDashboardFolder: 'Loki Meta Monitoring',
_config+:: {
internal_components: false,
// The Meta Monitoring helm chart uses Grafana Alloy instead of promtail
promtail+: {
enabled: false,
},
meta_monitoring+: {
enabled: true,
},
},
}