Compare commits

...

11 Commits

Author SHA1 Message Date
Michel Hollands
a01992194b Use OpenTelemetry token for traces
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-05-01 17:24:38 +01:00
Michel Hollands
636b654828 Add docs on how to setup Loki
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-05-01 17:20:00 +01:00
Michel Hollands
5d553e50f6 Add config for Loki traces to OTEL
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-05-01 16:54:51 +01:00
Michel Hollands
3f200115f9 Consider the meta monitoring namespace for logs as well
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-05-01 10:16:57 +01:00
Michel Hollands
f0bdf0760d Merge pull request #82 from grafana/fix_correct_version
Update version correctly
2024-04-29 16:44:48 +01:00
Michel Hollands
314b1db19b Update version correctly
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-04-29 16:44:28 +01:00
Michel Hollands
b547784d54 Merge pull request #81 from grafana/update_version
Update Chart to version 0.0.2
2024-04-29 16:40:03 +01:00
Michel Hollands
af4cd1f8c0 Update to version 2
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-04-29 16:39:05 +01:00
Michel Hollands
116119bdc4 Merge pull request #80 from grafana/fix_logic_for_secrets
Fix a few more things
2024-04-29 16:36:24 +01:00
Michel Hollands
df794115f0 Fix a few more things
Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
2024-04-29 16:34:21 +01:00
Michel Hollands
c26e509f65 Merge pull request #79 from grafana/use_updated_dashboards2
Cleanup dashboards
2024-04-29 15:17:15 +01:00
8 changed files with 62 additions and 37 deletions

View File

@@ -13,7 +13,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.1
version: 0.0.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.

View File

@@ -801,7 +801,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)",
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__interval])) by (level)",
"intervalFactor": 3,
"legendFormat": "{{level}}",
"refId": "A"

View File

@@ -1997,7 +1997,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__auto]))",
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))",
"refId": "A"
}
],
@@ -2690,7 +2690,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__auto]))",
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))",
"refId": "A"
}
],
@@ -3635,7 +3635,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"} | logfmt | level=\"error\"[$__auto]))",
"expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))",
"refId": "A"
}
],

View File

@@ -9,7 +9,7 @@
{{- define "agent.all_namespaces" -}}
{{- $list := list }}
{{- range .Values.namespacesToMonitor }}
{{- $list = append $list (printf "\"%s\"" .) }}
{{- $list = append $list (printf "%s" .) }}
{{- end }}
{{- $list = append $list .Release.Namespace }}
{{- join "|" $list }}
@@ -51,7 +51,7 @@
{{- $list = append $list ("otelcol.exporter.otlp.local.input") }}
{{- end }}
{{- if .Values.cloud.traces.enabled }}
{{- $list = append $list ("otelcol.exporter.otlp.cloud.input") }}
{{- $list = append $list ("otelcol.exporter.otlphttp.cloud.input") }}
{{- end }}
{{- join ", " $list }}
{{- end }}

View File

@@ -40,10 +40,12 @@ data:
{{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }}
// Logs
{{- if .Values.cloud.logs.enabled }}
remote.kubernetes.secret "logs_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.logs.secret -}}"
}
{{- end }}
loki.source.kubernetes "pods" {
clustering {
@@ -80,10 +82,12 @@ data:
{{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }}
// Metrics
{{- if .Values.cloud.metrics.enabled }}
remote.kubernetes.secret "metrics_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.metrics.secret -}}"
}
{{- end }}
discovery.kubernetes "metric_pods" {
role = "pod"
@@ -139,7 +143,7 @@ data:
rule {
source_labels = ["namespace"]
regex = "{{ include "agent.all_namespaces" . }}""
regex = "{{ include "agent.all_namespaces" . }}"
action = "keep"
}
@@ -278,10 +282,12 @@ data:
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
// Traces
{{- if .Values.cloud.traces.enabled }}
remote.kubernetes.secret "traces_credentials" {
namespace = "{{- $.Release.Namespace -}}"
name = "{{- .Values.cloud.traces.secret -}}"
}
{{- end }}
// Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river
otelcol.receiver.otlp "otlp_receiver" {
@@ -289,13 +295,23 @@ data:
// In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the
// default OTLP gRPC port) for the OTLP protocol.
grpc {
endpoint = "0.0.0.0:4317"
endpoint = "0.0.0.0:4317"
}
// We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor
// named 'default'.
output {
traces = [otelcol.processor.batch.default.input]
traces = [otelcol.processor.batch.default.input]
}
}
otelcol.receiver.jaeger "jaeger" {
protocols {
thrift_http {}
}
output {
traces = [otelcol.processor.batch.default.input]
}
}
@@ -316,7 +332,7 @@ data:
{{- if .Values.local.logs.enabled }}
loki.write "local" {
endpoint {
url = "http://loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
url = "http://{{- .Release.Namespace -}}-loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push"
}
}
{{- end }}
@@ -329,25 +345,6 @@ data:
}
{{- end }}
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
// The OpenTelemetry exporter exports processed trace spans to another target that is listening for OTLP format traces.
// A unique label, 'local', is added to uniquely identify this exporter.
otelcol.exporter.otlp "local" {
// Define the client for exporting.
client {
// Send to the locally running Tempo instance, on port 4317 (OTLP gRPC).
endpoint = "meta-tempo-distributor:4317"
// Configure TLS settings for communicating with the endpoint.
tls {
// The connection is insecure.
insecure = true
// Do not verify TLS certificates when connecting.
insecure_skip_verify = true
}
}
}
{{- end }}
{{- if .Values.cloud.logs.enabled }}
loki.write "cloud" {
endpoint {
@@ -373,7 +370,7 @@ data:
{{- end }}
{{- if .Values.cloud.traces.enabled }}
otelcol.exporter.otlp "cloud" {
otelcol.exporter.otlphttp "cloud" {
client {
endpoint = nonsensitive(remote.kubernetes.secret.traces_credentials.data["endpoint"])
auth = otelcol.auth.basic.creds.handler

View File

@@ -12,7 +12,7 @@ data:
# List of data sources to delete from the database.
deleteDatasources:
- name: Loki
- name: Loki
orgId: 1
# List of data sources to insert/update depending on what's
@@ -32,7 +32,7 @@ data:
uid: loki_ds
# <string> Sets the data source's URL, including the
# port.
url: http://loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
url: http://{{- $.Release.Namespace -}}-loki-gateway.{{- $.Release.Namespace -}}.svc.cluster.local
# <bool> Toggles whether the data source is pre-selected
# for new panels. You can set only one default
# data source per organization.

View File

@@ -267,6 +267,15 @@ alloy:
memory: '600Mi'
limits:
memory: '4Gi'
extraPorts:
- name: "otel"
port: 4317
targetPort: 4317
protocol: "TCP"
- name: "thrifthttp"
port: 14268
targetPort: 14268
protocol: "TCP"
controller:
type: "statefulset"
autoscaling:

View File

@@ -34,12 +34,12 @@
--from-literal=endpoint='https://prometheus-us-central1.grafana.net/api/prom/push'
kubectl create secret generic traces -n meta \
--from-literal=username=<traces username> \
--from-literal=username=<OTLP instance ID> \
--from-literal=password=<token>
--from-literal=endpoint='https://tempo-us-central1.grafana.net/tempo'
--from-literal=endpoint='https://otlp-gateway-prod-us-east-0.grafana.net/otlp'
```
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and Tempo instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki, Prometheus/Mimir and Tempo.
The logs, metrics and traces usernames are the `User / Username / Instance IDs` of the Loki, Prometheus/Mimir and OpenTelemetry instances in Grafana Cloud. From `Home` in Grafana click on `Stacks`. Then go to the `Details` pages of Loki and Prometheus/Mimir. For OpenTelemetry go to the `Configure` page.
1. Create a values.yaml file based on the [default one](../charts/meta-monitoring/values.yaml). Fill in the names of the secrets created above as needed. An example minimal values.yaml looks like this:
@@ -163,4 +163,23 @@ For each of the dashboard files in charts/meta-monitoring/src/dashboards folder
```
mimirtool rules print --address=<your_cloud_prometheus_endpoint> --id=<your_instance_id> --key=<your_cloud_access_policy_token>
```
```
## Configure Loki to send traces
1. In the Loki config enable tracing:
```
loki:
tracing:
enabled: true
```
1. Add the following environment variables to your Loki binaries. When using the Loki Helm chart these can be added using the `extraEnv` setting for the Loki components.
1. JAEGER_ENDPOINT: http address of the mmc-alloy service installed by the meta-monitoring chart, for example "http://mmc-alloy:14268/api/traces"
1. JAEGER_AGENT_TAGS: extra tags you would like to add to the spans, for example 'cluster="abc",namespace="def"'
1. JAEGER_SAMPLER_TYPE: the sampling strategy, for example to sample all use 'const' with a value of 1 for the next environment variable
1. JAEGER_SAMPLER_PARAM: 1
1. If Loki is installed in a different namespace you can create an [ExternalName service](https://kubernetes.io/docs/concepts/services-networking/service/#externalname) in Kubernetes to point to the mmc-alloy service in the meta monitoring namespace