forked from RemoteSync/grafana-meta-monitoring-chart
Compare commits
49 Commits
fix_traces
...
test_branc
Author | SHA1 | Date | |
---|---|---|---|
|
cb31d42f57 | ||
|
e6db102da8 | ||
|
1a33ef0d2b | ||
|
0e95fcc5cb | ||
|
f5b0477a2d | ||
|
2939c3cd63 | ||
|
76c8884a3c | ||
|
edc556b074 | ||
|
e5e13ac517 | ||
|
8b9ed3c9f7 | ||
|
844708681f | ||
|
ce216cd558 | ||
|
0418d16a1b | ||
|
8cff0e0e75 | ||
|
65995dce4f | ||
|
4d42fb664d | ||
|
9457c25ced | ||
|
ca686afc3e | ||
|
4b01214225 | ||
|
0e63a86fe5 | ||
|
4e8b2be044 | ||
|
df12d96f9c | ||
|
fcb5de6793 | ||
|
661662caec | ||
|
2a681ce1eb | ||
|
52e4516e04 | ||
|
95085c4e72 | ||
|
55d3c9d723 | ||
|
618ab3778b | ||
|
89d9bdb5e2 | ||
|
291f680c16 | ||
|
3658769c7a | ||
|
1be9bc8d0a | ||
|
81d63a4383 | ||
|
333ba3a3fd | ||
|
7aa091cbf8 | ||
|
d309a5bc50 | ||
|
346dd4968e | ||
|
f5c9fa0593 | ||
|
d5e8df856d | ||
|
2d85e7e120 | ||
|
1a4a1ad885 | ||
|
c1ff364c29 | ||
|
bd0ef0e2cc | ||
|
0216163885 | ||
|
c42718649f | ||
|
650df8217a | ||
|
f7946ff713 | ||
|
b312fc37fc |
19
.github/configs/cluster-config.yaml
vendored
Normal file
19
.github/configs/cluster-config.yaml
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: kind.x-k8s.io/v1alpha4
|
||||||
|
kind: Cluster
|
||||||
|
nodes:
|
||||||
|
- role: control-plane
|
||||||
|
kubeadmConfigPatches:
|
||||||
|
- |
|
||||||
|
kind: ClusterConfiguration
|
||||||
|
controllerManager:
|
||||||
|
extraArgs:
|
||||||
|
bind-address: 0.0.0.0
|
||||||
|
secure-port: "10257"
|
||||||
|
scheduler:
|
||||||
|
extraArgs:
|
||||||
|
bind-address: 0.0.0.0
|
||||||
|
secure-port: "10259"
|
||||||
|
- |
|
||||||
|
kind: KubeProxyConfiguration
|
||||||
|
metricsBindAddress: 0.0.0.0:10249
|
||||||
|
- role: worker
|
77
.github/workflows/helm-ci.yml
vendored
77
.github/workflows/helm-ci.yml
vendored
@@ -1,6 +1,7 @@
|
|||||||
---
|
---
|
||||||
name: helm-ci
|
name: helm-ci
|
||||||
on:
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- "charts/meta-monitoring/**"
|
- "charts/meta-monitoring/**"
|
||||||
@@ -19,48 +20,48 @@ jobs:
|
|||||||
- name: Lint Yaml
|
- name: Lint Yaml
|
||||||
run: make helm-lint
|
run: make helm-lint
|
||||||
|
|
||||||
# call-test:
|
call-test:
|
||||||
# name: Test Helm Chart
|
name: Test Helm Chart
|
||||||
# runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# steps:
|
steps:
|
||||||
# - name: Checkout
|
- name: Checkout
|
||||||
# uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
# with:
|
with:
|
||||||
# fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
# - name: Set up Helm
|
- name: Set up Helm
|
||||||
# uses: azure/setup-helm@v3
|
uses: azure/setup-helm@v3
|
||||||
# with:
|
with:
|
||||||
# version: v3.8.2
|
version: v3.14.0
|
||||||
|
|
||||||
# # Python is required because `ct lint` runs Yamale (https://github.com/23andMe/Yamale) and
|
# Python is required because `ct lint` runs Yamale (https://github.com/23andMe/Yamale) and
|
||||||
# # yamllint (https://github.com/adrienverge/yamllint) which require Python
|
# yamllint (https://github.com/adrienverge/yamllint) which require Python
|
||||||
# - name: Set up Python
|
- name: Set up Python
|
||||||
# uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
# with:
|
with:
|
||||||
# python-version: 3.7
|
python-version: 3.9
|
||||||
|
|
||||||
# - name: Set up chart-testing
|
- name: Set up chart-testing
|
||||||
# uses: helm/chart-testing-action@v2.4.0
|
uses: helm/chart-testing-action@v2
|
||||||
|
|
||||||
# - name: Run chart-testing (list-changed)
|
- name: Run chart-testing (list-changed)
|
||||||
# id: list-changed
|
id: list-changed
|
||||||
# run: |
|
run: |
|
||||||
# changed=$(ct list-changed --config "${CT_CONFIGFILE}")
|
changed=$(ct list-changed --config "${CT_CONFIGFILE}")
|
||||||
# if [[ -n "$changed" ]]; then
|
if [[ -n "$changed" ]]; then
|
||||||
# echo "changed=true" >> $GITHUB_OUTPUT
|
echo "changed=true" >> $GITHUB_OUTPUT
|
||||||
# fi
|
fi
|
||||||
|
|
||||||
# - name: Run chart-testing (lint)
|
- name: Run chart-testing (lint)
|
||||||
# run: ct lint --config "${CT_CONFIGFILE}" --check-version-increment=false
|
run: ct lint --config "${CT_CONFIGFILE}" --check-version-increment=false
|
||||||
|
|
||||||
# - name: Create kind cluster
|
- name: Create kind cluster
|
||||||
# uses: helm/kind-action@v1.8.0
|
uses: helm/kind-action@v1
|
||||||
# if: steps.list-changed.outputs.changed == 'true'
|
if: steps.list-changed.outputs.changed == 'true'
|
||||||
# with:
|
with:
|
||||||
# config: tools/kind.config
|
config: "${{ github.workspace }}/.github/configs/cluster-config.yaml"
|
||||||
|
|
||||||
# - name: Run chart-testing (install)
|
- name: Run chart-testing (install)
|
||||||
# run: |
|
run: |
|
||||||
# changed=$(ct list-changed --config "${CT_CONFIGFILE}")
|
changed=$(ct list-changed --config "${CT_CONFIGFILE}")
|
||||||
# ct install --config "${CT_CONFIGFILE}"
|
ct install --config "${CT_CONFIGFILE}"
|
||||||
|
16
README.md
16
README.md
@@ -1,8 +1,6 @@
|
|||||||
# meta-monitoring-chart
|
# meta-monitoring-chart
|
||||||
|
|
||||||
This is a meta-monitoring chart for Loki.
|
This is a meta-monitoring chart for Loki, specifically Loki installed via the Loki helm chart.
|
||||||
|
|
||||||
Note that this is pre-production software at the moment.
|
|
||||||
|
|
||||||
## Local and cloud modes
|
## Local and cloud modes
|
||||||
|
|
||||||
@@ -11,19 +9,15 @@ to small Loki, Mimir and Tempo installations running in the meta-monitoring name
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
To enable local mode set `local.<logs|metrics|traces>.enabled` to true.
|
|
||||||
|
|
||||||
In the cloud mode the logs, metrics and/or traces are sent to Grafana Cloud.
|
In the cloud mode the logs, metrics and/or traces are sent to Grafana Cloud.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
To enable cloud mode set `cloud.<logs|metrics|traces>.enabled` to true. The `endpoint`, `username` and `password` settings for your Grafana Cloud logs, metrics and traces instances have to be filled in as well.
|
|
||||||
|
|
||||||
Both modes can be enabled at the same time. Cloud mode is preferred.
|
Both modes can be enabled at the same time. Cloud mode is preferred.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
For more instructions including how to update the chart go to the [installation](docs/installation.md) page.
|
For more instructions including how to install the chart go to the [installation](docs/installation.md) page.
|
||||||
|
|
||||||
## Supported features
|
## Supported features
|
||||||
|
|
||||||
@@ -33,8 +27,7 @@ For more instructions including how to update the chart go to the [installation]
|
|||||||
- Specify PII regexes that are applied to logs before they are sent to Loki (cloud or local). The capture group in the regex is replaced with *****.
|
- Specify PII regexes that are applied to logs before they are sent to Loki (cloud or local). The capture group in the regex is replaced with *****.
|
||||||
- a Grafana instance is installed (when local mode is used) with the relevant datasources installed. The following dashboards are installed:
|
- a Grafana instance is installed (when local mode is used) with the relevant datasources installed. The following dashboards are installed:
|
||||||
- logs dashboards
|
- logs dashboards
|
||||||
- agent dashboards
|
- Alloy dashboards
|
||||||
- Retention is set to 24 hours
|
|
||||||
|
|
||||||
Most of these features are enabled by default. See the values.yaml file for how to enable/disable them.
|
Most of these features are enabled by default. See the values.yaml file for how to enable/disable them.
|
||||||
|
|
||||||
@@ -42,8 +35,7 @@ Most of these features are enabled by default. See the values.yaml file for how
|
|||||||
|
|
||||||
- This has not been tested on Openshift yet.
|
- This has not been tested on Openshift yet.
|
||||||
- The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
|
- The underlying Loki, Mimir and Tempo are at the default size installed by the Helm chart. This might need changing when monitoring bigger Loki, Mimir or Tempo installations.
|
||||||
- MinIO is used as storage at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.
|
- MinIO is used as storage for the local mode at the moment with a limited retention. At the moment this chart cannot be used for monitoring over longer periods.
|
||||||
- Agent self monitoring is not done at the moment.
|
|
||||||
|
|
||||||
## Developer help topics
|
## Developer help topics
|
||||||
|
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
dependencies:
|
dependencies:
|
||||||
- name: loki
|
- name: loki
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 6.5.1
|
version: 6.5.2
|
||||||
- name: alloy
|
- name: alloy
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 0.1.1
|
version: 0.3.0
|
||||||
- name: mimir-distributed
|
- name: mimir-distributed
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 5.3.0
|
version: 5.3.0
|
||||||
@@ -14,5 +14,5 @@ dependencies:
|
|||||||
- name: minio
|
- name: minio
|
||||||
repository: https://charts.min.io
|
repository: https://charts.min.io
|
||||||
version: 5.2.0
|
version: 5.2.0
|
||||||
digest: sha256:e0c7af6d328fe35f4b9a3557235f458d92225b84b1366dbb77c4626d3cdb5be9
|
digest: sha256:0eaa504de24724505fa4fff5169cd86628465ec366c253392c4ed24f15902b6b
|
||||||
generated: "2024-05-09T07:02:42.911579524Z"
|
generated: "2024-05-22T07:02:54.054326052Z"
|
||||||
|
@@ -13,7 +13,7 @@ type: application
|
|||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 0.0.3
|
version: 1.0.0
|
||||||
# This is the version number of the application being deployed. This version number should be
|
# This is the version number of the application being deployed. This version number should be
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
@@ -22,11 +22,11 @@ appVersion: "0.0.1"
|
|||||||
dependencies:
|
dependencies:
|
||||||
- name: loki
|
- name: loki
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 6.5.1
|
version: 6.5.2
|
||||||
condition: local.logs.enabled
|
condition: local.logs.enabled
|
||||||
- name: alloy
|
- name: alloy
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 0.1.1
|
version: 0.3.0
|
||||||
- name: mimir-distributed
|
- name: mimir-distributed
|
||||||
repository: https://grafana.github.io/helm-charts
|
repository: https://grafana.github.io/helm-charts
|
||||||
version: 5.3.0
|
version: 5.3.0
|
||||||
|
Binary file not shown.
BIN
charts/meta-monitoring/charts/alloy-0.3.0.tgz
Normal file
BIN
charts/meta-monitoring/charts/alloy-0.3.0.tgz
Normal file
Binary file not shown.
Binary file not shown.
BIN
charts/meta-monitoring/charts/loki-6.5.2.tgz
Normal file
BIN
charts/meta-monitoring/charts/loki-6.5.2.tgz
Normal file
Binary file not shown.
116
charts/meta-monitoring/ci/local-values.yaml
Normal file
116
charts/meta-monitoring/ci/local-values.yaml
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
namespacesToMonitor:
|
||||||
|
- loki
|
||||||
|
|
||||||
|
local:
|
||||||
|
grafana:
|
||||||
|
enabled: true
|
||||||
|
logs:
|
||||||
|
enabled: true
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
traces:
|
||||||
|
enabled: true
|
||||||
|
minio:
|
||||||
|
enabled: true
|
||||||
|
createSecret: false
|
||||||
|
|
||||||
|
cloud:
|
||||||
|
logs:
|
||||||
|
enabled: false
|
||||||
|
secret: logs
|
||||||
|
metrics:
|
||||||
|
enabled: false
|
||||||
|
secret: metrics
|
||||||
|
traces:
|
||||||
|
enabled: false
|
||||||
|
secret: traces
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
ingress:
|
||||||
|
hosts:
|
||||||
|
- host: monitoring.example.com
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
|
||||||
|
minio:
|
||||||
|
existingSecret: ""
|
||||||
|
rootUser: "abcdefghi"
|
||||||
|
rootPassword: "defghijkl"
|
||||||
|
|
||||||
|
loki:
|
||||||
|
deploymentMode: SingleBinary
|
||||||
|
singleBinary:
|
||||||
|
replicas: 1
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 3
|
||||||
|
memory: 4Gi
|
||||||
|
requests:
|
||||||
|
cpu: 2
|
||||||
|
memory: 2Gi
|
||||||
|
extraEnv:
|
||||||
|
# Keep a little bit lower than memory limits
|
||||||
|
- name: GOMEMLIMIT
|
||||||
|
value: 3750MiB
|
||||||
|
|
||||||
|
chunksCache:
|
||||||
|
# default is 500MB, with limited memory keep this smaller
|
||||||
|
writebackSizeLimit: 10MB
|
||||||
|
|
||||||
|
# Zero out replica counts of other deployment modes
|
||||||
|
backend:
|
||||||
|
replicas: 0
|
||||||
|
read:
|
||||||
|
replicas: 0
|
||||||
|
write:
|
||||||
|
replicas: 0
|
||||||
|
|
||||||
|
ingester:
|
||||||
|
replicas: 0
|
||||||
|
querier:
|
||||||
|
replicas: 0
|
||||||
|
queryFrontend:
|
||||||
|
replicas: 0
|
||||||
|
queryScheduler:
|
||||||
|
replicas: 0
|
||||||
|
distributor:
|
||||||
|
replicas: 0
|
||||||
|
compactor:
|
||||||
|
replicas: 0
|
||||||
|
indexGateway:
|
||||||
|
replicas: 0
|
||||||
|
bloomCompactor:
|
||||||
|
replicas: 0
|
||||||
|
bloomGateway:
|
||||||
|
replicas: 0
|
||||||
|
|
||||||
|
mimir-distributed:
|
||||||
|
minio:
|
||||||
|
enabled: false
|
||||||
|
global:
|
||||||
|
extraEnvFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: "meta-minio"
|
||||||
|
|
||||||
|
tempo-distributed:
|
||||||
|
distributor:
|
||||||
|
extraEnvFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: "meta-minio"
|
||||||
|
ingester:
|
||||||
|
extraEnvFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: "meta-minio"
|
||||||
|
compactor:
|
||||||
|
extraEnvFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: "meta-minio"
|
||||||
|
querier:
|
||||||
|
extraEnvFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: "meta-minio"
|
||||||
|
queryFrontend:
|
||||||
|
extraEnvFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: "meta-minio"
|
@@ -6,6 +6,5 @@ chart-dirs:
|
|||||||
chart-repos:
|
chart-repos:
|
||||||
- grafana=https://grafana.github.io/helm-charts
|
- grafana=https://grafana.github.io/helm-charts
|
||||||
- minio=https://charts.min.io
|
- minio=https://charts.min.io
|
||||||
helm-extra-args: --timeout 1200s
|
|
||||||
check-version-increment: false
|
check-version-increment: false
|
||||||
validate-maintainers: false
|
validate-maintainers: false
|
||||||
|
@@ -1824,7 +1824,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write)\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write.*|$namespace-[0-9]+)\"}[$__rate_interval]))",
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@@ -1921,7 +1921,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/distributor|(loki|enterprise-logs)-write|.*/loki)\"}",
|
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|$namespace/(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
@@ -2525,7 +2525,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))",
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@@ -2622,7 +2622,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}",
|
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
@@ -3308,7 +3308,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval]))",
|
"expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval]))",
|
||||||
"interval": "",
|
"interval": "",
|
||||||
"legendFormat": "{{ reason }}"
|
"legendFormat": "{{ reason }}"
|
||||||
}
|
}
|
||||||
@@ -3388,7 +3388,7 @@
|
|||||||
"reverseYBuckets": false,
|
"reverseYBuckets": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
|
"expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))",
|
||||||
"format": "heatmap",
|
"format": "heatmap",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"interval": "",
|
"interval": "",
|
||||||
@@ -3481,7 +3481,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))",
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@@ -3578,7 +3578,7 @@
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|.*loki-single-binary)\"}",
|
"expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|.*loki-single-binary|$namespace-[0-9]+)\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"intervalFactor": 3,
|
"intervalFactor": 3,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@@ -104,19 +104,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -206,19 +206,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -269,7 +269,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-frontend\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -371,19 +371,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -473,19 +473,19 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -536,7 +536,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-scheduler\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -638,19 +638,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -740,19 +740,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -803,7 +803,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*querier\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -854,7 +854,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -902,7 +902,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -1462,19 +1462,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -1564,19 +1564,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -1627,7 +1627,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*bloom-gateway\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|loki-read|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -1678,7 +1678,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -1726,7 +1726,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}} - {{device}}",
|
"legendFormat": "{{pod}} - {{device}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -2189,19 +2189,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -2291,19 +2291,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)",
|
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "request",
|
"legendFormat": "request",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -2354,7 +2354,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ruler|loki-backend|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
|
@@ -104,7 +104,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))",
|
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"}[$__rate_interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -116,7 +116,7 @@
|
|||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
|
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -206,7 +206,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})",
|
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -218,7 +218,7 @@
|
|||||||
"legendLink": null
|
"legendLink": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)",
|
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} > 0)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "limit",
|
"legendFormat": "limit",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
@@ -269,7 +269,7 @@
|
|||||||
"span": 4,
|
"span": 4,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*distributor\"})",
|
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|loki-write|loki-single-binary)\"})",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
"legendLink": null
|
"legendLink": null
|
||||||
|
@@ -135,6 +135,11 @@ data:
|
|||||||
}
|
}
|
||||||
|
|
||||||
prometheus.relabel "filter" {
|
prometheus.relabel "filter" {
|
||||||
|
rule {
|
||||||
|
target_label = "cluster"
|
||||||
|
replacement = "{{- .Values.clusterLabelValue -}}"
|
||||||
|
}
|
||||||
|
|
||||||
rule {
|
rule {
|
||||||
source_labels = ["__name__"]
|
source_labels = ["__name__"]
|
||||||
regex = "({{ include "agent.all_metrics" . }})"
|
regex = "({{ include "agent.all_metrics" . }})"
|
||||||
@@ -330,7 +335,7 @@ data:
|
|||||||
{{- if .Values.local.logs.enabled }}
|
{{- if .Values.local.logs.enabled }}
|
||||||
loki.write "local" {
|
loki.write "local" {
|
||||||
endpoint {
|
endpoint {
|
||||||
url = "http://{{- .Release.Namespace -}}-loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
|
url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
13
charts/meta-monitoring/templates/minio/secret.yaml
Normal file
13
charts/meta-monitoring/templates/minio/secret.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{{- if .Values.local.minio.createSecret }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: minio
|
||||||
|
namespace: {{ $.Release.Namespace }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": pre-install
|
||||||
|
"helm.sh/hook-weight": "-5"
|
||||||
|
data:
|
||||||
|
rootUser: dmFsdWUtMg0KDQo=
|
||||||
|
rootPassword: dmFsdWUtMg0KDQo=
|
||||||
|
{{- end }}
|
@@ -51,7 +51,11 @@ spec:
|
|||||||
protocol: TCP
|
protocol: TCP
|
||||||
envFrom:
|
envFrom:
|
||||||
- secretRef:
|
- secretRef:
|
||||||
|
{{- if .Values.local.minio.enabled }}
|
||||||
|
name: {{ $.Release.Namespace }}-minio
|
||||||
|
{{- else }}
|
||||||
name: minio
|
name: minio
|
||||||
|
{{- end }}
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
failureThreshold: 3
|
failureThreshold: 3
|
||||||
httpGet:
|
httpGet:
|
||||||
|
@@ -41,3 +41,4 @@
|
|||||||
{{- if empty .Values.metrics.retain -}}
|
{{- if empty .Values.metrics.retain -}}
|
||||||
{{- fail "All metrics will be collected, please specify some in metrics.retain" -}}
|
{{- fail "All metrics will be collected, please specify some in metrics.retain" -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
namespacesToMonitor:
|
namespacesToMonitor:
|
||||||
- loki
|
- loki
|
||||||
# The name of the cluster where this will be installed
|
# The name of the cluster where this will be installed
|
||||||
clusterLabelValue: "meta-monitoring"
|
clusterLabelValue: "meta"
|
||||||
# Set to true to write logs, metrics or traces to Grafana Cloud
|
# Set to true to write logs, metrics or traces to Grafana Cloud
|
||||||
# The secrets have to be created first
|
# The secrets have to be created first
|
||||||
cloud:
|
cloud:
|
||||||
@@ -26,7 +26,8 @@ local:
|
|||||||
traces:
|
traces:
|
||||||
enabled: false
|
enabled: false
|
||||||
minio:
|
minio:
|
||||||
enabled: false # This should be set to true if any of the previous is enabled
|
enabled: false # This should be set to true if any of the previous is enabled
|
||||||
|
createSecret: false # This is used for testing, do not use in production
|
||||||
grafana:
|
grafana:
|
||||||
version: 10.4.2
|
version: 10.4.2
|
||||||
# Gateway ingress configuration
|
# Gateway ingress configuration
|
||||||
@@ -52,14 +53,14 @@ grafana:
|
|||||||
# port:
|
# port:
|
||||||
# number: TODO
|
# number: TODO
|
||||||
# -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating
|
# -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating
|
||||||
#tls:
|
# tls:
|
||||||
# - secretName: grafana-tls
|
# - secretName: grafana-tls
|
||||||
# hosts:
|
# hosts:
|
||||||
# - monitoring.example.com
|
# - monitoring.example.com
|
||||||
logs:
|
logs:
|
||||||
# Adding regexes here will add a stage.replace block for logs. For more information see
|
# Adding regexes here will add a stage.replace block for logs. For more information see
|
||||||
# https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
|
# https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
|
||||||
piiRegexes: null # This example replaces the word after password with *****
|
piiRegexes: null # This example replaces the word after password with *****
|
||||||
# - expression: "password (\\\\S+)"
|
# - expression: "password (\\\\S+)"
|
||||||
# source: "" # Empty uses the log message
|
# source: "" # Empty uses the log message
|
||||||
# replace: "*****""
|
# replace: "*****""
|
||||||
@@ -149,6 +150,7 @@ metrics:
|
|||||||
- kube_pod_container_resource_requests
|
- kube_pod_container_resource_requests
|
||||||
- kube_pod_container_status_last_terminated_reason
|
- kube_pod_container_status_last_terminated_reason
|
||||||
- kube_pod_container_status_restarts_total
|
- kube_pod_container_status_restarts_total
|
||||||
|
- loki_azure_blob_request_duration_seconds_bucket
|
||||||
- loki_boltdb_shipper_compact_tables_operation_duration_seconds
|
- loki_boltdb_shipper_compact_tables_operation_duration_seconds
|
||||||
- loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds
|
- loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds
|
||||||
- loki_boltdb_shipper_retention_marker_count_total
|
- loki_boltdb_shipper_retention_marker_count_total
|
||||||
@@ -174,11 +176,15 @@ metrics:
|
|||||||
- loki_compactor_deleted_lines
|
- loki_compactor_deleted_lines
|
||||||
- loki_compactor_oldest_pending_delete_request_age_seconds
|
- loki_compactor_oldest_pending_delete_request_age_seconds
|
||||||
- loki_compactor_pending_delete_requests_count
|
- loki_compactor_pending_delete_requests_count
|
||||||
|
- loki_consul_request_duration_seconds_bucket
|
||||||
- loki_discarded_samples_total
|
- loki_discarded_samples_total
|
||||||
- loki_discarded_bytes_total
|
- loki_discarded_bytes_total
|
||||||
- loki_distributor_bytes_received_total
|
- loki_distributor_bytes_received_total
|
||||||
- loki_distributor_lines_received_total
|
- loki_distributor_lines_received_total
|
||||||
- loki_distributor_structured_metadata_bytes_received_total
|
- loki_distributor_structured_metadata_bytes_received_total
|
||||||
|
- loki_gcs_request_duration_seconds_bucket
|
||||||
|
- loki_gcs_request_duration_seconds_count
|
||||||
|
- loki_index_request_duration_seconds_bucket
|
||||||
- loki_index_request_duration_seconds_count
|
- loki_index_request_duration_seconds_count
|
||||||
- loki_ingester_chunk_age_seconds_bucket
|
- loki_ingester_chunk_age_seconds_bucket
|
||||||
- loki_ingester_chunk_age_seconds_count
|
- loki_ingester_chunk_age_seconds_count
|
||||||
@@ -191,6 +197,7 @@ metrics:
|
|||||||
- loki_ingester_chunk_entries_sum
|
- loki_ingester_chunk_entries_sum
|
||||||
- loki_ingester_chunk_size_bytes_bucket
|
- loki_ingester_chunk_size_bytes_bucket
|
||||||
- loki_ingester_chunk_utilization_bucket
|
- loki_ingester_chunk_utilization_bucket
|
||||||
|
- loki_ingester_chunk_utilization_count
|
||||||
- loki_ingester_chunk_utilization_sum
|
- loki_ingester_chunk_utilization_sum
|
||||||
- loki_ingester_chunks_flushed_total
|
- loki_ingester_chunks_flushed_total
|
||||||
- loki_ingester_flush_queue_length
|
- loki_ingester_flush_queue_length
|
||||||
@@ -208,6 +215,8 @@ metrics:
|
|||||||
- loki_ruler_wal_prometheus_remote_storage_samples_total
|
- loki_ruler_wal_prometheus_remote_storage_samples_total
|
||||||
- loki_ruler_wal_samples_appended_total
|
- loki_ruler_wal_samples_appended_total
|
||||||
- loki_ruler_wal_storage_created_series_total
|
- loki_ruler_wal_storage_created_series_total
|
||||||
|
- loki_s3_request_duration_seconds_bucket
|
||||||
|
- loki_s3_request_duration_seconds_count
|
||||||
- loki_write_batch_retries_total
|
- loki_write_batch_retries_total
|
||||||
- loki_write_dropped_bytes_total
|
- loki_write_dropped_bytes_total
|
||||||
- loki_write_dropped_entries_total
|
- loki_write_dropped_entries_total
|
||||||
|
@@ -1,8 +1,12 @@
|
|||||||
# Update the dependencies
|
# Update the dependencies
|
||||||
|
|
||||||
The dependencies are the version of Loki, Mimir, Agent and so on that are included in this chart.
|
The dependencies are the versions of Loki, Mimir, Agent and so on that are included in this chart.
|
||||||
The current versions can be found in the [Chart.yaml](../charts/meta-monitoring/Chart.yaml) file.
|
The current versions can be found in the [Chart.yaml](../charts/meta-monitoring/Chart.yaml) file.
|
||||||
|
|
||||||
|
A Github action runs daily to see if updated versions are available. A PR will be created.
|
||||||
|
|
||||||
|
The manual steps are as follows:
|
||||||
|
|
||||||
Run this in the charts/meta-monitoring directory after updating a dependency:
|
Run this in the charts/meta-monitoring directory after updating a dependency:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
1. Use an existing Grafana Cloud account or setup a new one. Then create an access token:
|
1. Use an existing Grafana Cloud account or setup a new one. Then create an access token:
|
||||||
|
|
||||||
1. In Grafana go to Administration -> Users and Access -> Cloud access policies.
|
1. In a Grafana instance on Grafana Cloud go to Administration -> Users and Access -> Cloud access policies.
|
||||||
|
|
||||||
1. Click `Create access policy`.
|
1. Click `Create access policy`.
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@
|
|||||||
--from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
|
--from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
|
||||||
```
|
```
|
||||||
|
|
||||||
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page.
|
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page. The endpoints will also have to be changed to match your settings.
|
||||||
|
|
||||||
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
|
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
|
||||||
|
|
||||||
@@ -102,7 +102,7 @@
|
|||||||
enabled: true
|
enabled: true
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing the chart
|
## Installing, updating and deleting the chart
|
||||||
|
|
||||||
1. Add the repo
|
1. Add the repo
|
||||||
|
|
||||||
@@ -175,7 +175,7 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder
|
|||||||
|
|
||||||
## Configure Loki to send traces
|
## Configure Loki to send traces
|
||||||
|
|
||||||
1. In the Loki config enable tracing:
|
1. In the Loki that is being monitored enable tracing in the config:
|
||||||
|
|
||||||
```
|
```
|
||||||
loki:
|
loki:
|
||||||
@@ -194,4 +194,8 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder
|
|||||||
|
|
||||||
## Configure external access using an Ingress in local mode
|
## Configure external access using an Ingress in local mode
|
||||||
|
|
||||||
When using local mode by default a Kubernetes [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object is created to access the Grafana instance. This will need to be adapted to your cloud provider by updating the `grafana.ingress` section of the `values.yaml` file provided to Helm. Check the documentation of your cloud provider for available options.
|
When using local mode by default a Kubernetes [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object is created to access the Grafana instance. This will need to be adapted to your cloud provider by updating the `grafana.ingress` section of the `values.yaml` file provided to Helm. Check the documentation of your cloud provider for available options.
|
||||||
|
|
||||||
|
## Kube-state-metrics
|
||||||
|
|
||||||
|
Metrics about Kubernetes objects are scraped from [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). This needs to be installed in the cluster. The `kubeStateMetrics.endpoint` entry in values.yaml should be set to it's address (without the `/metrics` part in the URL).
|
||||||
|
Reference in New Issue
Block a user