forked from RemoteSync/grafana-meta-monitoring-chart
Compare commits
11 Commits
use_update
...
open_extra
Author | SHA1 | Date | |
---|---|---|---|
|
a01992194b | ||
|
636b654828 | ||
|
5d553e50f6 | ||
|
3f200115f9 | ||
|
f0bdf0760d | ||
|
314b1db19b | ||
|
b547784d54 | ||
|
af4cd1f8c0 | ||
|
116119bdc4 | ||
|
df794115f0 | ||
|
c26e509f65 |
@@ -13,7 +13,7 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.0.1
|
||||
version: 0.0.2
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
|
@@ -801,7 +801,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)",
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__interval])) by (level)",
|
||||
"intervalFactor": 3,
|
||||
"legendFormat": "{{level}}",
|
||||
"refId": "A"
|
||||
|
@@ -1997,7 +1997,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__auto]))",
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -2690,7 +2690,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__auto]))",
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -3635,7 +3635,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"} | logfmt | level=\"error\"[$__auto]))",
|
||||
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
@@ -9,7 +9,7 @@
|
||||
{{- define "agent.all_namespaces" -}}
|
||||
{{- $list := list }}
|
||||
{{- range .Values.namespacesToMonitor }}
|
||||
{{- $list = append $list (printf "\"%s\"" .) }}
|
||||
{{- $list = append $list (printf "%s" .) }}
|
||||
{{- end }}
|
||||
{{- $list = append $list .Release.Namespace }}
|
||||
{{- join "|" $list }}
|
||||
@@ -51,7 +51,7 @@
|
||||
{{- $list = append $list ("otelcol.exporter.otlp.local.input") }}
|
||||
{{- end }}
|
||||
{{- if .Values.cloud.traces.enabled }}
|
||||
{{- $list = append $list ("otelcol.exporter.otlp.cloud.input") }}
|
||||
{{- $list = append $list ("otelcol.exporter.otlphttp.cloud.input") }}
|
||||
{{- end }}
|
||||
{{- join ", " $list }}
|
||||
{{- end }}
|
@@ -40,10 +40,12 @@ data:
|
||||
{{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }}
|
||||
// Logs
|
||||
|
||||
{{- if .Values.cloud.logs.enabled }}
|
||||
remote.kubernetes.secret "logs_credentials" {
|
||||
namespace = "{{- $.Release.Namespace -}}"
|
||||
name = "{{- .Values.cloud.logs.secret -}}"
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
loki.source.kubernetes "pods" {
|
||||
clustering {
|
||||
@@ -80,10 +82,12 @@ data:
|
||||
{{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }}
|
||||
// Metrics
|
||||
|
||||
{{- if .Values.cloud.metrics.enabled }}
|
||||
remote.kubernetes.secret "metrics_credentials" {
|
||||
namespace = "{{- $.Release.Namespace -}}"
|
||||
name = "{{- .Values.cloud.metrics.secret -}}"
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
discovery.kubernetes "metric_pods" {
|
||||
role = "pod"
|
||||
@@ -139,7 +143,7 @@ data:
|
||||
|
||||
rule {
|
||||
source_labels = ["namespace"]
|
||||
regex = "{{ include "agent.all_namespaces" . }}""
|
||||
regex = "{{ include "agent.all_namespaces" . }}"
|
||||
|
||||
action = "keep"
|
||||
}
|
||||
@@ -278,10 +282,12 @@ data:
|
||||
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
|
||||
// Traces
|
||||
|
||||
{{- if .Values.cloud.traces.enabled }}
|
||||
remote.kubernetes.secret "traces_credentials" {
|
||||
namespace = "{{- $.Release.Namespace -}}"
|
||||
name = "{{- .Values.cloud.traces.secret -}}"
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
// Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river
|
||||
otelcol.receiver.otlp "otlp_receiver" {
|
||||
@@ -289,13 +295,23 @@ data:
|
||||
// In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the
|
||||
// default OTLP gRPC port) for the OTLP protocol.
|
||||
grpc {
|
||||
endpoint = "0.0.0.0:4317"
|
||||
endpoint = "0.0.0.0:4317"
|
||||
}
|
||||
|
||||
// We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor
|
||||
// named 'default'.
|
||||
output {
|
||||
traces = [otelcol.processor.batch.default.input]
|
||||
traces = [otelcol.processor.batch.default.input]
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.receiver.jaeger "jaeger" {
|
||||
protocols {
|
||||
thrift_http {}
|
||||
}
|
||||
|
||||
output {
|
||||
traces = [otelcol.processor.batch.default.input]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -316,7 +332,7 @@ data:
|
||||
{{- if .Values.local.logs.enabled }}
|
||||
loki.write "local" {
|
||||
endpoint {
|
||||
url = "http://loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
|
||||
url = "http://{{- .Release.Namespace -}}-loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
{{- end }}
|
||||
@@ -329,25 +345,6 @@ data:
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
|
||||
// The OpenTelemetry exporter exports processed trace spans to another target that is listening for OTLP format traces.
|
||||
// A unique label, 'local', is added to uniquely identify this exporter.
|
||||
otelcol.exporter.otlp "local" {
|
||||
// Define the client for exporting.
|
||||
client {
|
||||
// Send to the locally running Tempo instance, on port 4317 (OTLP gRPC).
|
||||
endpoint = "meta-tempo-distributor:4317"
|
||||
// Configure TLS settings for communicating with the endpoint.
|
||||
tls {
|
||||
// The connection is insecure.
|
||||
insecure = true
|
||||
// Do not verify TLS certificates when connecting.
|
||||
insecure_skip_verify = true
|
||||
}
|
||||
}
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.cloud.logs.enabled }}
|
||||
loki.write "cloud" {
|
||||
endpoint {
|
||||
@@ -373,7 +370,7 @@ data:
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.cloud.traces.enabled }}
|
||||
otelcol.exporter.otlp "cloud" {
|
||||
otelcol.exporter.otlphttp "cloud" {
|
||||
client {
|
||||
endpoint = nonsensitive(remote.kubernetes.secret.traces_credentials.data["endpoint"])
|
||||
auth = otelcol.auth.basic.creds.handler
|
||||
|
@@ -12,7 +12,7 @@ data:
|
||||
|
||||
# List of data sources to delete from the database.
|
||||
deleteDatasources:
|
||||
- name: Loki
|
||||
- name: Loki
|
||||
orgId: 1
|
||||
|
||||
# List of data sources to insert/update depending on what's
|
||||
@@ -32,7 +32,7 @@ data:
|
||||
uid: loki_ds
|
||||
# <string> Sets the data source's URL, including the
|
||||
# port.
|
||||
url: http://loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
|
||||
url: http://{{- $.Release.Namespace -}}-loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
|
||||
# <bool> Toggles whether the data source is pre-selected
|
||||
# for new panels. You can set only one default
|
||||
# data source per organization.
|
||||
|
@@ -267,6 +267,15 @@ alloy:
|
||||
memory: '600Mi'
|
||||
limits:
|
||||
memory: '4Gi'
|
||||
extraPorts:
|
||||
- name: "otel"
|
||||
port: 4317
|
||||
targetPort: 4317
|
||||
protocol: "TCP"
|
||||
- name: "thrifthttp"
|
||||
port: 14268
|
||||
targetPort: 14268
|
||||
protocol: "TCP"
|
||||
controller:
|
||||
type: "statefulset"
|
||||
autoscaling:
|
||||
|
@@ -34,12 +34,12 @@
|
||||
--from-literal=endpoint='https://prometheus-us-central1.grafana.net/api/prom/push'
|
||||
|
||||
kubectl create secret generic traces -n meta \
|
||||
--from-literal=username=<traces username> \
|
||||
--from-literal=username=<OTLP instance ID> \
|
||||
--from-literal=password=<token>
|
||||
--from-literal=endpoint='https://tempo-us-central1.grafana.net/tempo'
|
||||
--from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
|
||||
```
|
||||
|
||||
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and Tempo instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki, Prometheus/Mimir and Tempo.
|
||||
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page.
|
||||
|
||||
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
|
||||
|
||||
@@ -163,4 +163,23 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder
|
||||
|
||||
```
|
||||
mimirtool rules print --address=<your_cloud_prometheus_endpoint> --id=<your_instance_id> --key=<your_cloud_access_policy_token>
|
||||
```
|
||||
```
|
||||
|
||||
## Configure Loki to send traces
|
||||
|
||||
1. In the Loki config enable tracing:
|
||||
|
||||
```
|
||||
loki:
|
||||
tracing:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
1. Add the following environment variables to your Loki binaries. When using the Loki Helm chart these can be added using the `extraEnv` setting for the Loki components.
|
||||
|
||||
1. JAEGER_ENDPOINT: http address of the mmc-alloy service installed by the meta-monitoring chart, for example "http://mmc-alloy:14268/api/traces"
|
||||
1. JAEGER_AGENT_TAGS: extra tags you would like to add to the spans, for example 'cluster="abc",namespace="def"'
|
||||
1. JAEGER_SAMPLER_TYPE: the sampling strategy, for example to sample all use 'const' with a value of 1 for the next environment variable
|
||||
1. JAEGER_SAMPLER_PARAM: 1
|
||||
|
||||
1. If Loki is installed in a different namespace you can create an [ExternalName service](https://kubernetes.io/docs/concepts/services-networking/service/#externalname) in Kubernetes to point to the mmc-alloy service in the meta monitoring namespace
|
||||
|
Reference in New Issue
Block a user