Compare commits
116 Commits
add_more_m
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
a425aaae44 | ||
|
7f462a85d5 | ||
|
ee54e6d33c | ||
|
c8bac2f25d | ||
|
9d568e2e16 | ||
|
5752b22217 | ||
|
66273bee9d | ||
|
5b1eba118f | ||
|
3b76b884c3 | ||
|
13e7117426 | ||
|
c4b411ce7a | ||
|
4bfd6e4bdf | ||
|
dde09419fa | ||
|
35ad1be7fb | ||
|
31cfe2dccc | ||
|
da71bb7e76 | ||
|
5ff6666aec | ||
|
acd4385c80 | ||
|
23454a9f63 | ||
|
177cfa08fc | ||
|
781a47b2db | ||
|
79fbf8028b | ||
|
9a9330bdc0 | ||
|
20e79b7530 | ||
|
dc4f93c1a3 | ||
|
7642e91b93 | ||
|
e83423c179 | ||
|
f5fe732847 | ||
|
eb89247281 | ||
|
d0f751ec23 | ||
|
055e5c9548 | ||
|
932ead2ce7 | ||
|
01866c3064 | ||
|
d83dd67095 | ||
|
5b6c2245fa | ||
|
33bce4a44f | ||
|
58f5aff348 | ||
|
d1b74453a8 | ||
|
bebc604fc6 | ||
|
54e69cb421 | ||
|
b815f4749d | ||
|
dcb2b8c8c4 | ||
|
99be3e6cb5 | ||
|
11257ee600 | ||
|
831dcb624c | ||
|
80c6a1e344 | ||
|
02237a1f28 | ||
|
bcac10ca74 | ||
|
9a3d0b51d8 | ||
|
df38407fb3 | ||
|
0914919499 | ||
|
918b6b9cb4 | ||
|
55f3424118 | ||
|
98e5ecd887 | ||
|
58b438cdb5 | ||
|
c4af598b75 | ||
|
c78fe2d9fa | ||
|
bae6e28b51 | ||
|
8f38e9508f | ||
|
de8a87dea1 | ||
|
48fad9f387 | ||
|
4ec5f08646 | ||
|
a1b66f0cd4 | ||
|
34bbe47d75 | ||
|
0ef850e96c | ||
|
c91a819e77 | ||
|
71462a9f93 | ||
|
c5f1daf8f0 | ||
|
952c3e85d9 | ||
|
f6b72897cd | ||
|
8b6314fde3 | ||
|
4d42fb664d | ||
|
9457c25ced | ||
|
ca686afc3e | ||
|
4b01214225 | ||
|
0e63a86fe5 | ||
|
4e8b2be044 | ||
|
df12d96f9c | ||
|
fcb5de6793 | ||
|
661662caec | ||
|
2a681ce1eb | ||
|
52e4516e04 | ||
|
95085c4e72 | ||
|
55d3c9d723 | ||
|
618ab3778b | ||
|
89d9bdb5e2 | ||
|
291f680c16 | ||
|
3658769c7a | ||
|
1be9bc8d0a | ||
|
81d63a4383 | ||
|
333ba3a3fd | ||
|
7aa091cbf8 | ||
|
d309a5bc50 | ||
|
346dd4968e | ||
|
f5c9fa0593 | ||
|
d5e8df856d | ||
|
2d85e7e120 | ||
|
1a4a1ad885 | ||
|
c1ff364c29 | ||
|
bd0ef0e2cc | ||
|
0216163885 | ||
|
c42718649f | ||
|
650df8217a | ||
|
f7946ff713 | ||
|
b312fc37fc | ||
|
ad96f09600 | ||
|
090f1ef91a | ||
|
b2957d90f0 | ||
|
f8aea814c5 | ||
|
91c19f07d3 | ||
|
315b203082 | ||
|
caf4eda1be | ||
|
21ba3ebe8c | ||
|
f0a934a393 | ||
|
941420b417 | ||
|
1ea10cdbfa |
19
.github/configs/cluster-config.yaml
vendored
Normal file
19
.github/configs/cluster-config.yaml
vendored
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: kind.x-k8s.io/v1alpha4
|
||||||
|
kind: Cluster
|
||||||
|
nodes:
|
||||||
|
- role: control-plane
|
||||||
|
kubeadmConfigPatches:
|
||||||
|
- |
|
||||||
|
kind: ClusterConfiguration
|
||||||
|
controllerManager:
|
||||||
|
extraArgs:
|
||||||
|
bind-address: 0.0.0.0
|
||||||
|
secure-port: "10257"
|
||||||
|
scheduler:
|
||||||
|
extraArgs:
|
||||||
|
bind-address: 0.0.0.0
|
||||||
|
secure-port: "10259"
|
||||||
|
- |
|
||||||
|
kind: KubeProxyConfiguration
|
||||||
|
metricsBindAddress: 0.0.0.0:10249
|
||||||
|
- role: worker
|
@ -19,6 +19,9 @@ jobs:
|
|||||||
updateVersions:
|
updateVersions:
|
||||||
name: Update the subcharts
|
name: Update the subcharts
|
||||||
runs-on: "ubuntu-latest"
|
runs-on: "ubuntu-latest"
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
id-token: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
@ -66,6 +69,20 @@ jobs:
|
|||||||
echo "changed=true" >> "${GITHUB_OUTPUT}"
|
echo "changed=true" >> "${GITHUB_OUTPUT}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- id: get-secrets
|
||||||
|
uses: grafana/shared-workflows/actions/get-vault-secrets@main
|
||||||
|
with:
|
||||||
|
# Secrets placed in the ci/repo/grafana/<repo>/<path> path in Vault
|
||||||
|
repo_secrets: |
|
||||||
|
APP_ID=github-app:app-id
|
||||||
|
PRIVATE_KEY=github-app:private-key
|
||||||
|
|
||||||
|
- uses: actions/create-github-app-token@v1
|
||||||
|
id: app-token
|
||||||
|
with:
|
||||||
|
app-id: ${{ env.APP_ID }}
|
||||||
|
private-key: ${{ env.PRIVATE_KEY }}
|
||||||
|
|
||||||
- name: Create pull request
|
- name: Create pull request
|
||||||
if: steps.update-loki.outputs.changed == 'true' || steps.update-grafana-alloy.outputs.changed == 'true' || steps.update-mimir-distributed.outputs.changed == 'true' || steps.update-tempo-distributed.outputs.changed == 'true' || steps.update-minio.outputs.changed == 'true'
|
if: steps.update-loki.outputs.changed == 'true' || steps.update-grafana-alloy.outputs.changed == 'true' || steps.update-mimir-distributed.outputs.changed == 'true' || steps.update-tempo-distributed.outputs.changed == 'true' || steps.update-minio.outputs.changed == 'true'
|
||||||
uses: peter-evans/create-pull-request@v5
|
uses: peter-evans/create-pull-request@v5
|
||||||
@ -79,10 +96,15 @@ jobs:
|
|||||||
labels: dependencies
|
labels: dependencies
|
||||||
branch: chore/update-dependencies
|
branch: chore/update-dependencies
|
||||||
delete-branch: true
|
delete-branch: true
|
||||||
|
team-reviewers: "@grafana/loki-squad"
|
||||||
|
token: ${{ steps.app-token.outputs.token }}
|
||||||
|
|
||||||
updateGrafana:
|
updateGrafana:
|
||||||
name: Update the Grafana version
|
name: Update the Grafana version
|
||||||
runs-on: "ubuntu-latest"
|
runs-on: "ubuntu-latest"
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
id-token: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
@ -98,6 +120,20 @@ jobs:
|
|||||||
echo "changed=true" >> "${GITHUB_OUTPUT}"
|
echo "changed=true" >> "${GITHUB_OUTPUT}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- id: get-secrets
|
||||||
|
uses: grafana/shared-workflows/actions/get-vault-secrets@main
|
||||||
|
with:
|
||||||
|
# Secrets placed in the ci/repo/grafana/<repo>/<path> path in Vault
|
||||||
|
repo_secrets: |
|
||||||
|
APP_ID=github-app:app-id
|
||||||
|
PRIVATE_KEY=github-app:private-key
|
||||||
|
|
||||||
|
- uses: actions/create-github-app-token@v1
|
||||||
|
id: app-token
|
||||||
|
with:
|
||||||
|
app-id: ${{ env.APP_ID }}
|
||||||
|
private-key: ${{ env.PRIVATE_KEY }}
|
||||||
|
|
||||||
- name: Create pull request
|
- name: Create pull request
|
||||||
if: steps.update-grafana.outputs.changed == 'true'
|
if: steps.update-grafana.outputs.changed == 'true'
|
||||||
uses: peter-evans/create-pull-request@v5
|
uses: peter-evans/create-pull-request@v5
|
||||||
@ -111,3 +147,5 @@ jobs:
|
|||||||
labels: dependencies
|
labels: dependencies
|
||||||
branch: chore/update-minio
|
branch: chore/update-minio
|
||||||
delete-branch: true
|
delete-branch: true
|
||||||
|
team-reviewers: "@grafana/loki-squad"
|
||||||
|
token: ${{ steps.app-token.outputs.token }}
|
||||||
|
11
.github/workflows/helm-ci.yml
vendored
11
.github/workflows/helm-ci.yml
vendored
@ -1,6 +1,7 @@
|
|||||||
---
|
---
|
||||||
name: helm-ci
|
name: helm-ci
|
||||||
on:
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- "charts/meta-monitoring/**"
|
- "charts/meta-monitoring/**"
|
||||||
@ -24,7 +25,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Checkout
|
# - name: Checkout
|
||||||
# uses: actions/checkout@v3
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# fetch-depth: 0
|
# fetch-depth: 0
|
||||||
|
|
||||||
@ -38,10 +39,10 @@ jobs:
|
|||||||
# - name: Set up Python
|
# - name: Set up Python
|
||||||
# uses: actions/setup-python@v4
|
# uses: actions/setup-python@v4
|
||||||
# with:
|
# with:
|
||||||
# python-version: 3.7
|
# python-version: 3.9
|
||||||
|
|
||||||
# - name: Set up chart-testing
|
# - name: Set up chart-testing
|
||||||
# uses: helm/chart-testing-action@v2.4.0
|
# uses: helm/chart-testing-action@v2
|
||||||
|
|
||||||
# - name: Run chart-testing (list-changed)
|
# - name: Run chart-testing (list-changed)
|
||||||
# id: list-changed
|
# id: list-changed
|
||||||
@ -55,10 +56,10 @@ jobs:
|
|||||||
# run: ct lint --config "${CT_CONFIGFILE}" --check-version-increment=false
|
# run: ct lint --config "${CT_CONFIGFILE}" --check-version-increment=false
|
||||||
|
|
||||||
# - name: Create kind cluster
|
# - name: Create kind cluster
|
||||||
# uses: helm/kind-action@v1.8.0
|
# uses: helm/kind-action@v1
|
||||||
# if: steps.list-changed.outputs.changed == 'true'
|
# if: steps.list-changed.outputs.changed == 'true'
|
||||||
# with:
|
# with:
|
||||||
# config: tools/kind.config
|
# config: "${{ github.workspace }}/.github/configs/cluster-config.yaml"
|
||||||
|
|
||||||
# - name: Run chart-testing (install)
|
# - name: Run chart-testing (install)
|
||||||
# run: |
|
# run: |
|
||||||
|
16
README.md
16
README.md
@ -1,8 +1,6 @@
|
|||||||
# meta-monitoring-chart
|
# meta-monitoring-chart
|
||||||
|
|
||||||
This is a meta-monitoring chart for Loki.
|
This is a meta-monitoring chart for Loki, specifically Loki installed via the Loki helm chart.
|
||||||
|
|
||||||
Note that this is pre-production software at the moment.
|
|
||||||
|
|
||||||
## Local and cloud modes
|
## Local and cloud modes
|
||||||
|
|
||||||
@ -11,19 +9,15 @@ to small Loki, Mimir and Tempo installations running in the meta-monitoring name
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
To enable local mode set `local.<logs|metrics|traces>.enabled` to true.
|
|
||||||
|
|
||||||
In the cloud mode the logs, metrics and/or traces are sent to Grafana Cloud.
|
In the cloud mode the logs, metrics and/or traces are sent to Grafana Cloud.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
To enable cloud mode set `cloud.<logs|metrics|traces>.enabled` to true. The `endpoint`, `username` and `password` settings for your Grafana Cloud logs, metrics and traces instances have to be filled in as well.
|
|
||||||
|
|
||||||
Both modes can be enabled at the same time. Cloud mode is preferred.
|
Both modes can be enabled at the same time. Cloud mode is preferred.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
For more instructions including how to update the chart go to the [installation](docs/installation.md) page.
|
For more instructions including how to install the chart go to the [installation](docs/installation.md) page.
|
||||||
|
|
||||||
## Supported features
|
## Supported features
|
||||||
|
|
||||||
@ -33,8 +27,7 @@ For more instructions including how to update the chart go to the [installation]
|
|||||||
- Specify PII regexes that are applied to logs before they are sent to Loki (cloud or local). The capture group in the regex is replaced with *****.
|
- Specify PII regexes that are applied to logs before they are sent to Loki (cloud or local). The capture group in the regex is replaced with *****.
|
||||||
- a Grafana instance is installed (when local mode is used) with the relevant datasources installed. The following dashboards are installed:
|
- a Grafana instance is installed (when local mode is used) with the relevant datasources installed. The following dashboards are installed:
|
||||||
- logs dashboards
|
- logs dashboards
|
||||||
- agent dashboards
|
- Alloy dashboards
|
||||||
- Retention is set to 24 hours
|
|
||||||
|
|
||||||
Most of these features are enabled by default. See the values.yaml file for how to enable/disable them.
|
Most of these features are enabled by default. See the values.yaml file for how to enable/disable them.
|
||||||
|
|
||||||
@ -42,8 +35,7 @@ Most of these features are enabled by default. See the values.yaml file for how
|
|||||||
|
|
||||||
- This has not been tested on Openshift yet.
|
- This has not been tested on Openshift yet.
|
||||||
- The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
|
- The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
|
||||||
- MinIO is used as storage at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.
|
- MinIO is used as storage for the local mode at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.
|
||||||
- Agent self monitoring is not done at the moment.
|
|
||||||
|
|
||||||
## Developer help topics
|
## Developer help topics
|
||||||
|
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
dependencies:
|
dependencies:
|
||||||
- name: loki
|
- name: loki
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 6.5.0
|
version: 6.29.0
|
||||||
- name: alloy
|
- name: alloy
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 0.1.1
|
version: 0.12.5
|
||||||
- name: mimir-distributed
|
- name: mimir-distributed
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 5.3.0
|
version: 5.6.0
|
||||||
- name: tempo-distributed
|
- name: tempo-distributed
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 1.9.9
|
version: 1.33.0
|
||||||
- name: minio
|
- name: minio
|
||||||
repository: https://charts.min.io
|
repository: https://charts.min.io
|
||||||
version: 5.2.0
|
version: 5.4.0
|
||||||
digest: sha256:5328702b5f6b0487aba8f7bc77d6abfcd5e094569e9205cd725971e3e31255dd
|
digest: sha256:5225a03d9003384639f5d43b1971126371269347f16f221b7aed377ab85d71be
|
||||||
generated: "2024-05-08T07:03:21.797461955Z"
|
generated: "2025-03-27T07:03:11.17404081Z"
|
||||||
|
@ -13,7 +13,7 @@ type: application
|
|||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 0.0.2
|
version: 1.3.0
|
||||||
# This is the version number of the application being deployed. This version number should be
|
# This is the version number of the application being deployed. This version number should be
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
@ -22,20 +22,20 @@ appVersion: "0.0.1"
|
|||||||
dependencies:
|
dependencies:
|
||||||
- name: loki
|
- name: loki
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 6.5.0
|
version: 6.29.0
|
||||||
condition: local.logs.enabled
|
condition: local.logs.enabled
|
||||||
- name: alloy
|
- name: alloy
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 0.1.1
|
version: 0.12.5
|
||||||
- name: mimir-distributed
|
- name: mimir-distributed
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 5.3.0
|
version: 5.6.0
|
||||||
condition: local.metrics.enabled
|
condition: local.metrics.enabled
|
||||||
- name: tempo-distributed
|
- name: tempo-distributed
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 1.9.9
|
version: 1.33.0
|
||||||
condition: local.traces.enabled
|
condition: local.traces.enabled
|
||||||
- name: minio
|
- name: minio
|
||||||
repository: https://charts.min.io
|
repository: https://charts.min.io
|
||||||
version: 5.2.0
|
version: 5.4.0
|
||||||
condition: local.minio.enabled
|
condition: local.minio.enabled
|
||||||
|
Binary file not shown.
BIN
charts/meta-monitoring/charts/alloy-0.12.5.tgz
Normal file
BIN
charts/meta-monitoring/charts/alloy-0.12.5.tgz
Normal file
Binary file not shown.
BIN
charts/meta-monitoring/charts/loki-6.29.0.tgz
Normal file
BIN
charts/meta-monitoring/charts/loki-6.29.0.tgz
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
charts/meta-monitoring/charts/mimir-distributed-5.6.0.tgz
Normal file
BIN
charts/meta-monitoring/charts/mimir-distributed-5.6.0.tgz
Normal file
Binary file not shown.
Binary file not shown.
BIN
charts/meta-monitoring/charts/minio-5.4.0.tgz
Normal file
BIN
charts/meta-monitoring/charts/minio-5.4.0.tgz
Normal file
Binary file not shown.
BIN
charts/meta-monitoring/charts/tempo-distributed-1.33.0.tgz
Normal file
BIN
charts/meta-monitoring/charts/tempo-distributed-1.33.0.tgz
Normal file
Binary file not shown.
Binary file not shown.
@ -1824,7 +1824,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write)\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write.*|$namespace-[0-9]+)\"}[$__rate_interval]))",
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@ -1921,7 +1921,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/distributor|(loki|enterprise-logs)-write|.*/loki)\"}",
|
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|$namespace/(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
@ -2525,7 +2525,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))",
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@ -2622,7 +2622,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}",
|
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
@ -3308,7 +3308,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval]))",
|
"expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval]))",
|
||||||
"interval": "",
|
"interval": "",
|
||||||
"legendFormat": "{{ reason }}"
|
"legendFormat": "{{ reason }}"
|
||||||
}
|
}
|
||||||
@ -3388,7 +3388,7 @@
|
|||||||
"reverseYBuckets": false,
|
"reverseYBuckets": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
|
"expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
|
||||||
"format": "heatmap",
|
"format": "heatmap",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"interval": "",
|
"interval": "",
|
||||||
@ -3481,7 +3481,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))",
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@ -3578,7 +3578,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|.*loki-single-binary)\"}",
|
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|.*loki-single-binary|$namespace-[0-9]+)\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -104,19 +104,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -206,19 +206,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -269,7 +269,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-frontend\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -371,19 +371,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -473,19 +473,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -536,7 +536,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-scheduler\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -638,19 +638,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -740,19 +740,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -803,7 +803,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*querier\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -854,7 +854,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -902,7 +902,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -1462,19 +1462,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -1564,19 +1564,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -1627,7 +1627,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*bloom-gateway\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -1678,7 +1678,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -1726,7 +1726,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -2189,19 +2189,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -2291,19 +2291,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -2354,7 +2354,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ruler|loki-backend|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
|
@ -2449,7 +2449,7 @@
|
|||||||
"repeatIteration": null,
|
"repeatIteration": null,
|
||||||
"repeatRowId": null,
|
"repeatRowId": null,
|
||||||
"showTitle": true,
|
"showTitle": true,
|
||||||
"title": "TSBD Index",
|
"title": "TSDB Index",
|
||||||
"titleSize": "h6"
|
"titleSize": "h6"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -2897,4 +2897,4 @@
|
|||||||
"title": "Loki / Reads",
|
"title": "Loki / Reads",
|
||||||
"uid": "reads",
|
"uid": "reads",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -116,7 +116,7 @@
|
|||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -206,7 +206,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -218,7 +218,7 @@
|
|||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@ -269,7 +269,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*distributor\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|loki-write|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
|
@ -120,9 +120,9 @@ data:
|
|||||||
replacement = "{{- .Values.clusterLabelValue -}}"
|
replacement = "{{- .Values.clusterLabelValue -}}"
|
||||||
}
|
}
|
||||||
rule {
|
rule {
|
||||||
source_labels = ["__meta_kubernetes_pod_container_port_number"]
|
source_labels = ["__meta_kubernetes_pod_container_port_name"]
|
||||||
action = "drop"
|
action = "keep"
|
||||||
regex = "9095"
|
regex = ".*metrics.*"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,6 +135,11 @@ data:
|
|||||||
}
|
}
|
||||||
|
|
||||||
prometheus.relabel "filter" {
|
prometheus.relabel "filter" {
|
||||||
|
rule {
|
||||||
|
target_label = "cluster"
|
||||||
|
replacement = "{{- .Values.clusterLabelValue -}}"
|
||||||
|
}
|
||||||
|
|
||||||
rule {
|
rule {
|
||||||
source_labels = ["__name__"]
|
source_labels = ["__name__"]
|
||||||
regex = "({{ include "agent.all_metrics" . }})"
|
regex = "({{ include "agent.all_metrics" . }})"
|
||||||
@ -150,133 +155,7 @@ data:
|
|||||||
|
|
||||||
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
||||||
}
|
}
|
||||||
{{- if .Values.kubeStateMetrics.enabled }}
|
|
||||||
|
|
||||||
prometheus.scrape "kubeStateMetrics" {
|
|
||||||
clustering {
|
|
||||||
enabled = true
|
|
||||||
}
|
|
||||||
targets = [ { "__address__" = "{{ .Values.kubeStateMetrics.endpoint }}" } ]
|
|
||||||
forward_to = [ prometheus.relabel.filter.receiver ]
|
|
||||||
}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
// cAdvisor and Kubelet metrics
|
|
||||||
// Based on https://github.com/Chewie/loutretelecom-manifests/blob/main/manifests/addons/monitoring/config.river
|
|
||||||
discovery.kubernetes "all_nodes" {
|
|
||||||
role = "node"
|
|
||||||
namespaces {
|
|
||||||
own_namespace = true
|
|
||||||
names = [ {{ include "agent.namespaces" . }} ]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
discovery.relabel "all_nodes" {
|
|
||||||
targets = discovery.kubernetes.all_nodes.targets
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_node_name"]
|
|
||||||
target_label = "node"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_namespace"]
|
|
||||||
target_label = "namespace"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_pod_name"]
|
|
||||||
target_label = "pod"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
|
|
||||||
separator = "/"
|
|
||||||
regex = "(.*)/(.*)/(.*)"
|
|
||||||
replacement = "${1}/${2}-${3}"
|
|
||||||
target_label = "job"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
target_label = "cluster"
|
|
||||||
replacement = "{{- .Values.clusterLabelValue -}}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prometheus.scrape "cadvisor" {
|
|
||||||
clustering {
|
|
||||||
enabled = true
|
|
||||||
}
|
|
||||||
targets = discovery.relabel.all_nodes.output
|
|
||||||
forward_to = [ prometheus.relabel.filter.receiver ]
|
|
||||||
|
|
||||||
metrics_path = "/metrics/cadvisor"
|
|
||||||
scheme = "https"
|
|
||||||
|
|
||||||
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
|
||||||
tls_config {
|
|
||||||
ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prometheus.scrape "kubelet" {
|
|
||||||
clustering {
|
|
||||||
enabled = true
|
|
||||||
}
|
|
||||||
targets = discovery.relabel.all_nodes.output
|
|
||||||
forward_to = [ prometheus.relabel.filter.receiver ]
|
|
||||||
|
|
||||||
metrics_path = "/metrics"
|
|
||||||
scheme = "https"
|
|
||||||
|
|
||||||
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
|
||||||
tls_config {
|
|
||||||
ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prometheus.exporter.unix "promexporter" {}
|
|
||||||
|
|
||||||
prometheus.scrape "node_exporter" {
|
|
||||||
clustering {
|
|
||||||
enabled = true
|
|
||||||
}
|
|
||||||
targets = prometheus.exporter.unix.promexporter.targets
|
|
||||||
forward_to = [prometheus.relabel.node_exporter.receiver]
|
|
||||||
|
|
||||||
job_name = "node-exporter"
|
|
||||||
}
|
|
||||||
|
|
||||||
prometheus.relabel "node_exporter" {
|
|
||||||
forward_to = [ prometheus.relabel.filter.receiver ]
|
|
||||||
|
|
||||||
rule {
|
|
||||||
replacement = env("HOSTNAME")
|
|
||||||
target_label = "nodename"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
replacement = "node-exporter"
|
|
||||||
target_label = "job"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_node_name"]
|
|
||||||
target_label = "node"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_namespace"]
|
|
||||||
target_label = "namespace"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_pod_name"]
|
|
||||||
target_label = "pod"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
|
|
||||||
separator = "/"
|
|
||||||
regex = "(.*)/(.*)/(.*)"
|
|
||||||
replacement = "${1}/${2}-${3}"
|
|
||||||
target_label = "job"
|
|
||||||
}
|
|
||||||
rule {
|
|
||||||
target_label = "cluster"
|
|
||||||
replacement = "{{- .Values.clusterLabelValue -}}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
|
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
|
||||||
@ -330,7 +209,7 @@ data:
|
|||||||
{{- if .Values.local.logs.enabled }}
|
{{- if .Values.local.logs.enabled }}
|
||||||
loki.write "local" {
|
loki.write "local" {
|
||||||
endpoint {
|
endpoint {
|
||||||
url = "http://{{- .Release.Namespace -}}-loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
|
url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
@ -346,7 +225,7 @@ data:
|
|||||||
{{- if .Values.local.traces.enabled }}
|
{{- if .Values.local.traces.enabled }}
|
||||||
otelcol.exporter.otlphttp "local" {
|
otelcol.exporter.otlphttp "local" {
|
||||||
client {
|
client {
|
||||||
endpoint = "http://{{- .Release.Name -}}-tempo-distributor.svc:4318"
|
endpoint = "http://{{- .Release.Name -}}-tempo-distributor.{{- .Release.Namespace -}}.svc:4318"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
@ -34,10 +34,6 @@
|
|||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
|
||||||
{{- if empty .Values.namespacesToMonitor -}}
|
|
||||||
{{- fail "No namespaces have been specified in namespacesToMonitor" -}}
|
|
||||||
{{- end -}}
|
|
||||||
|
|
||||||
{{- if empty .Values.metrics.retain -}}
|
{{- if empty .Values.metrics.retain -}}
|
||||||
{{- fail "All metrics will be collected, please specify some in metrics.retain" -}}
|
{{- fail "All metrics will be collected, please specify some in metrics.retain" -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
# Specify the namespaces to monitor here
|
# Specify the namespaces to monitor here
|
||||||
namespacesToMonitor:
|
# By default the chart will monitor the namespace it is installed in
|
||||||
- loki
|
# namespacesToMonitor:
|
||||||
|
# - loki
|
||||||
# The name of the cluster where this will be installed
|
# The name of the cluster where this will be installed
|
||||||
clusterLabelValue: "meta-monitoring"
|
clusterLabelValue: "meta"
|
||||||
# Set to true to write logs, metrics or traces to Grafana Cloud
|
# Set to true to write logs, metrics or traces to Grafana Cloud
|
||||||
# The secrets have to be created first
|
# The secrets have to be created first
|
||||||
cloud:
|
cloud:
|
||||||
@ -28,7 +29,7 @@ local:
|
|||||||
minio:
|
minio:
|
||||||
enabled: false # This should be set to true if any of the previous is enabled
|
enabled: false # This should be set to true if any of the previous is enabled
|
||||||
grafana:
|
grafana:
|
||||||
version: 10.4.2
|
version: 11.4.3
|
||||||
# Gateway ingress configuration
|
# Gateway ingress configuration
|
||||||
ingress:
|
ingress:
|
||||||
# -- Specifies whether an ingress for the gateway should be created
|
# -- Specifies whether an ingress for the gateway should be created
|
||||||
@ -45,7 +46,12 @@ grafana:
|
|||||||
paths:
|
paths:
|
||||||
- path: /
|
- path: /
|
||||||
# -- pathType (e.g. ImplementationSpecific, Prefix, .. etc.) might also be required by some Ingress Controllers
|
# -- pathType (e.g. ImplementationSpecific, Prefix, .. etc.) might also be required by some Ingress Controllers
|
||||||
# pathType: Prefix
|
pathType: Prefix
|
||||||
|
# backend:
|
||||||
|
# service:
|
||||||
|
# name: TODO
|
||||||
|
# port:
|
||||||
|
# number: TODO
|
||||||
# -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating
|
# -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating
|
||||||
#tls:
|
#tls:
|
||||||
# - secretName: grafana-tls
|
# - secretName: grafana-tls
|
||||||
@ -55,15 +61,17 @@ logs:
|
|||||||
# Adding regexes here will add a stage.replace block for logs. For more information see
|
# Adding regexes here will add a stage.replace block for logs. For more information see
|
||||||
# https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
|
# https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
|
||||||
piiRegexes: null # This example replaces the word after password with *****
|
piiRegexes: null # This example replaces the word after password with *****
|
||||||
# - expression: "password (\\\\S+)"
|
# - expression: "password (\\\\S+)"
|
||||||
# source: "" # Empty uses the log message
|
# source: "" # Empty uses the log message
|
||||||
# replace: "*****""
|
# replace: "*****""
|
||||||
# The lines matching these will be kept in Loki
|
# The lines matching these will be kept in Loki
|
||||||
retain:
|
retain:
|
||||||
# This shows the queries
|
# This shows the queries
|
||||||
|
- executing query
|
||||||
- caller=metrics.go
|
- caller=metrics.go
|
||||||
# This shows any errors
|
# This shows any errors
|
||||||
- level=error
|
- level=error
|
||||||
|
- level=warn
|
||||||
# Log lines for delete requests
|
# Log lines for delete requests
|
||||||
- delete request for user added
|
- delete request for user added
|
||||||
- Started processing delete request
|
- Started processing delete request
|
||||||
@ -144,6 +152,7 @@ metrics:
|
|||||||
- kube_pod_container_resource_requests
|
- kube_pod_container_resource_requests
|
||||||
- kube_pod_container_status_last_terminated_reason
|
- kube_pod_container_status_last_terminated_reason
|
||||||
- kube_pod_container_status_restarts_total
|
- kube_pod_container_status_restarts_total
|
||||||
|
- loki_azure_blob_request_duration_seconds_bucket
|
||||||
- loki_boltdb_shipper_compact_tables_operation_duration_seconds
|
- loki_boltdb_shipper_compact_tables_operation_duration_seconds
|
||||||
- loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds
|
- loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds
|
||||||
- loki_boltdb_shipper_retention_marker_count_total
|
- loki_boltdb_shipper_retention_marker_count_total
|
||||||
@ -169,11 +178,15 @@ metrics:
|
|||||||
- loki_compactor_deleted_lines
|
- loki_compactor_deleted_lines
|
||||||
- loki_compactor_oldest_pending_delete_request_age_seconds
|
- loki_compactor_oldest_pending_delete_request_age_seconds
|
||||||
- loki_compactor_pending_delete_requests_count
|
- loki_compactor_pending_delete_requests_count
|
||||||
|
- loki_consul_request_duration_seconds_bucket
|
||||||
- loki_discarded_samples_total
|
- loki_discarded_samples_total
|
||||||
- loki_discarded_bytes_total
|
- loki_discarded_bytes_total
|
||||||
- loki_distributor_bytes_received_total
|
- loki_distributor_bytes_received_total
|
||||||
- loki_distributor_lines_received_total
|
- loki_distributor_lines_received_total
|
||||||
- loki_distributor_structured_metadata_bytes_received_total
|
- loki_distributor_structured_metadata_bytes_received_total
|
||||||
|
- loki_gcs_request_duration_seconds_bucket
|
||||||
|
- loki_gcs_request_duration_seconds_count
|
||||||
|
- loki_index_request_duration_seconds_bucket
|
||||||
- loki_index_request_duration_seconds_count
|
- loki_index_request_duration_seconds_count
|
||||||
- loki_ingester_chunk_age_seconds_bucket
|
- loki_ingester_chunk_age_seconds_bucket
|
||||||
- loki_ingester_chunk_age_seconds_count
|
- loki_ingester_chunk_age_seconds_count
|
||||||
@ -186,6 +199,7 @@ metrics:
|
|||||||
- loki_ingester_chunk_entries_sum
|
- loki_ingester_chunk_entries_sum
|
||||||
- loki_ingester_chunk_size_bytes_bucket
|
- loki_ingester_chunk_size_bytes_bucket
|
||||||
- loki_ingester_chunk_utilization_bucket
|
- loki_ingester_chunk_utilization_bucket
|
||||||
|
- loki_ingester_chunk_utilization_count
|
||||||
- loki_ingester_chunk_utilization_sum
|
- loki_ingester_chunk_utilization_sum
|
||||||
- loki_ingester_chunks_flushed_total
|
- loki_ingester_chunks_flushed_total
|
||||||
- loki_ingester_flush_queue_length
|
- loki_ingester_flush_queue_length
|
||||||
@ -203,6 +217,8 @@ metrics:
|
|||||||
- loki_ruler_wal_prometheus_remote_storage_samples_total
|
- loki_ruler_wal_prometheus_remote_storage_samples_total
|
||||||
- loki_ruler_wal_samples_appended_total
|
- loki_ruler_wal_samples_appended_total
|
||||||
- loki_ruler_wal_storage_created_series_total
|
- loki_ruler_wal_storage_created_series_total
|
||||||
|
- loki_s3_request_duration_seconds_bucket
|
||||||
|
- loki_s3_request_duration_seconds_count
|
||||||
- loki_write_batch_retries_total
|
- loki_write_batch_retries_total
|
||||||
- loki_write_dropped_bytes_total
|
- loki_write_dropped_bytes_total
|
||||||
- loki_write_dropped_entries_total
|
- loki_write_dropped_entries_total
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
# Update the dependencies
|
# Update the dependencies
|
||||||
|
|
||||||
The dependencies are the version of Loki, Mimir, Agent and so on that are included in this chart.
|
The dependencies are the versions of Loki, Mimir, Agent and so on that are included in this chart.
|
||||||
The current versions can be found in the [Chart.yaml](../charts/meta-monitoring/Chart.yaml) file.
|
The current versions can be found in the [Chart.yaml](../charts/meta-monitoring/Chart.yaml) file.
|
||||||
|
|
||||||
|
A Github action runs daily to see if updated versions are available. A PR will be created.
|
||||||
|
|
||||||
|
The manual steps are as follows:
|
||||||
|
|
||||||
Run this in the charts/meta-monitoring directory after updating a dependency:
|
Run this in the charts/meta-monitoring directory after updating a dependency:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
1. Use an existing Grafana Cloud account or setup a new one. Then create an access token:
|
1. Use an existing Grafana Cloud account or setup a new one. Then create an access token:
|
||||||
|
|
||||||
1. In Grafana go to Administration -> Users and Access -> Cloud access policies.
|
1. In a Grafana instance on Grafana Cloud go to Administration -> Users and Access -> Cloud access policies.
|
||||||
|
|
||||||
1. Click `Create access policy`.
|
1. Click `Create access policy`.
|
||||||
|
|
||||||
@ -39,7 +39,7 @@
|
|||||||
--from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
|
--from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
|
||||||
```
|
```
|
||||||
|
|
||||||
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page.
|
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page. The endpoints will also have to be changed to match your settings.
|
||||||
|
|
||||||
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
|
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
|
||||||
|
|
||||||
@ -91,7 +91,7 @@
|
|||||||
|
|
||||||
local:
|
local:
|
||||||
grafana:
|
grafana:
|
||||||
enabled:true
|
enabled: true
|
||||||
logs:
|
logs:
|
||||||
enabled: true
|
enabled: true
|
||||||
metrics:
|
metrics:
|
||||||
@ -102,7 +102,7 @@
|
|||||||
enabled: true
|
enabled: true
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing the chart
|
## Installing, updating and deleting the chart
|
||||||
|
|
||||||
1. Add the repo
|
1. Add the repo
|
||||||
|
|
||||||
@ -175,7 +175,7 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder
|
|||||||
|
|
||||||
## Configure Loki to send traces
|
## Configure Loki to send traces
|
||||||
|
|
||||||
1. In the Loki config enable tracing:
|
1. In the Loki that is being monitored enable tracing in the config:
|
||||||
|
|
||||||
```
|
```
|
||||||
loki:
|
loki:
|
||||||
@ -187,7 +187,15 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder
|
|||||||
|
|
||||||
1. JAEGER_ENDPOINT: http address of the mmc-alloy service installed by the meta-monitoring chart, for example "http://mmc-alloy:14268/api/traces"
|
1. JAEGER_ENDPOINT: http address of the mmc-alloy service installed by the meta-monitoring chart, for example "http://mmc-alloy:14268/api/traces"
|
||||||
1. JAEGER_AGENT_TAGS: extra tags you would like to add to the spans, for example 'cluster="abc",namespace="def"'
|
1. JAEGER_AGENT_TAGS: extra tags you would like to add to the spans, for example 'cluster="abc",namespace="def"'
|
||||||
1. JAEGER_SAMPLER_TYPE: the sampling strategy, for example to sample all use 'const' with a value of 1 for the next environment variable
|
1. JAEGER_SAMPLER_TYPE: the sampling strategy, we suggest setting this to `ratelimiting` so at most 1 trace is accepted per second. See these [docs](https://www.jaegertracing.io/docs/1.57/sampling/) for more options.
|
||||||
1. JAEGER_SAMPLER_PARAM: 1
|
1. JAEGER_SAMPLER_PARAM: 1.0
|
||||||
|
|
||||||
1. If Loki is installed in a different namespace you can create an [ExternalName service](https://kubernetes.io/docs/concepts/services-networking/service/#externalname) in Kubernetes to point to the mmc-alloy service in the meta monitoring namespace
|
1. If Loki is installed in a different namespace you can create an [ExternalName service](https://kubernetes.io/docs/concepts/services-networking/service/#externalname) in Kubernetes to point to the mmc-alloy service in the meta monitoring namespace
|
||||||
|
|
||||||
|
## Configure external access using an Ingress in local mode
|
||||||
|
|
||||||
|
When using local mode by default a Kubernetes [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object is created to access the Grafana instance. This will need to be adapted to your cloud provider by updating the `grafana.ingress` section of the `values.yaml` file provided to Helm. Check the documentation of your cloud provider for available options.
|
||||||
|
|
||||||
|
## Kube-state-metrics
|
||||||
|
|
||||||
|
Metrics about Kubernetes objects are scraped from [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). This needs to be installed in the cluster. The `kubeStateMetrics.endpoint` entry in values.yaml should be set to it's address (without the `/metrics` part in the URL).
|
||||||
|
Loading…
x
Reference in New Issue
Block a user