diff --git a/charts/meta-monitoring/Chart.lock b/charts/meta-monitoring/Chart.lock index a6a6294..94afcbb 100644 --- a/charts/meta-monitoring/Chart.lock +++ b/charts/meta-monitoring/Chart.lock @@ -8,5 +8,11 @@ dependencies: - name: mimir-distributed repository: https://grafana.github.io/helm-charts version: 4.4.1 -digest: sha256:9238265d064bb85c3607f8be22c32c84837f2115c25978aebf6782b0a125c22c -generated: "2023-06-19T11:29:07.999693+01:00" +- name: tempo-distributed + repository: https://grafana.github.io/helm-charts + version: 1.4.7 +- name: minio + repository: https://charts.min.io + version: 5.0.11 +digest: sha256:4b04084e6fe821c4d481017b2430f7c8cd782a5d60830dd3a24eb8f10a9ece09 +generated: "2023-06-29T14:25:07.247853+01:00" diff --git a/charts/meta-monitoring/Chart.yaml b/charts/meta-monitoring/Chart.yaml index dde9cb7..c816ca3 100644 --- a/charts/meta-monitoring/Chart.yaml +++ b/charts/meta-monitoring/Chart.yaml @@ -34,4 +34,12 @@ dependencies: - name: mimir-distributed repository: https://grafana.github.io/helm-charts version: "4.4.1" + condition: local.enabled +- name: tempo-distributed + repository: https://grafana.github.io/helm-charts + version: "1.4.7" + condition: local.enabled +- name: minio + repository: https://charts.min.io + version: "5.0.11" condition: local.enabled \ No newline at end of file diff --git a/charts/meta-monitoring/charts/minio-5.0.11.tgz b/charts/meta-monitoring/charts/minio-5.0.11.tgz new file mode 100644 index 0000000..1f7dc55 Binary files /dev/null and b/charts/meta-monitoring/charts/minio-5.0.11.tgz differ diff --git a/charts/meta-monitoring/charts/tempo-distributed-1.4.7.tgz b/charts/meta-monitoring/charts/tempo-distributed-1.4.7.tgz new file mode 100644 index 0000000..ba03943 Binary files /dev/null and b/charts/meta-monitoring/charts/tempo-distributed-1.4.7.tgz differ diff --git a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl index 8cf12ae..7665bb6 100644 --- a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl +++ b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl @@ -26,4 +26,15 @@ {{- $list = append $list ("prometheus.remote_write.cloud.receiver") }} {{- end }} {{- join ", " $list }} +{{- end }} + +{{- define "agent.tempo_write_targets" -}} +{{- $list := list }} +{{- if .Values.local.enabled }} +{{- $list = append $list ("otelcol.exporter.otlp.local.input") }} +{{- end }} +{{- if .Values.cloud.enabled }} +{{- $list = append $list ("otelcol.exporter.otlp.cloud.input") }} +{{- end }} +{{- join ", " $list }} {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/agent/config.yaml b/charts/meta-monitoring/templates/agent/config.yaml index 18f1382..3c07a8b 100644 --- a/charts/meta-monitoring/templates/agent/config.yaml +++ b/charts/meta-monitoring/templates/agent/config.yaml @@ -47,11 +47,39 @@ data: forward_to = [ {{ include "agent.prometheus_write_targets" . }} ] } + // Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river + otelcol.receiver.otlp "otlp_receiver" { + // We don't technically need this, but it shows how to change listen address and incoming port. + // In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the + // default OTLP gRPC port) for the OTLP protocol. + grpc { + endpoint = "0.0.0.0:4317" + } + + // We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor + // named 'default'. + output { + traces = [otelcol.processor.batch.default.input] + } + } + + // The OpenTelemetry batch processor collects trace spans until a batch size or timeout is met, before sending those + // spans onto another target. This processor is labeled 'default'. + otelcol.processor.batch "default" { + // Wait until we've received 16K of data. + send_batch_size = 16384 + // Or until 2 seconds have elapsed. + timeout = "2s" + // When the Agent has enough batched data, send it to the OpenTelemetry exporter named 'local'. + output { + traces = [ {{ include "agent.tempo_write_targets" . }} ] + } + } {{- if .Values.local.enabled }} loki.write "local" { endpoint { - url = "http://{{- .Release.Name -}}-loki.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push" + url = "http://loki-gateway.{{- .Release.Namespace -}}.svc.cluster.local:80/loki/api/v1/push" } } @@ -60,6 +88,23 @@ data: url = "http://{{- .Release.Name -}}-mimir-nginx.{{- .Release.Namespace -}}.svc:80/api/v1/push" } } + + // The OpenTelemetry exporter exports processed trace spans to another target that is listening for OTLP format traces. + // A unique label, 'local', is added to uniquely identify this exporter. + otelcol.exporter.otlp "local" { + // Define the client for exporting. + client { + // Send to the locally running Tempo instance, on port 4317 (OTLP gRPC). + endpoint = "meta-tempo-distributor:4317" + // Configure TLS settings for communicating with the endpoint. + tls { + // The connection is insecure. + insecure = true + // Do not verify TLS certificates when connecting. + insecure_skip_verify = true + } + } + } {{- end }} {{- if .Values.cloud.enabled }} @@ -84,4 +129,16 @@ data: } } } + + otelcol.exporter.otlp "cloud" { + client { + endpoint = "{{- .Values.cloud.traces.endpoint -}}" + auth = otelcol.auth.basic.creds.handler + } + } + + otelcol.auth.basic "creds" { + username = "{{- .Values.cloud.traces.username -}}" + password = "{{- .Values.cloud.traces.password -}}" + } {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/datasources.yaml b/charts/meta-monitoring/templates/grafana/datasources.yaml index d4e2bc2..fc4f78b 100644 --- a/charts/meta-monitoring/templates/grafana/datasources.yaml +++ b/charts/meta-monitoring/templates/grafana/datasources.yaml @@ -58,4 +58,23 @@ data: # Allows users to edit data sources from the # Grafana UI. editable: true + - name: Tempo + # Sets the data source type. + type: tempo + # Sets the organization id. Defaults to orgId 1. + orgId: 1 + # Sets a custom UID to reference this + # data source in other parts of the configuration. + # If not specified, Grafana generates one. + uid: tempo_ds + # Sets the data source's URL, including the + # port. + url: http://{{- $.Release.Name -}}-tempo-query-frontend.{{- $.Release.Namespace -}}.svc:3100 + # Toggles whether the data source is pre-selected + # for new panels. You can set only one default + # data source per organization. + isDefault: + # Allows users to edit data sources from the + # Grafana UI. + editable: true {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index 0e102dc..904a280 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -4,9 +4,9 @@ namespacesToMonitor: - tempo clusterName: "observability" # TODO check if this can be derived local: - enabled: false -cloud: enabled: true +cloud: + enabled: false logs: endpoint: username: @@ -15,19 +15,32 @@ cloud: endpoint: username: password: + traces: + endpoint: + username: + password: + +global: + minio: + rootUser: "rootuser" + rootPassword: "rootpassword" # The following are configuration for the dependencies. # These should not be changed. + loki: loki: auth_enabled: false - commonConfig: - replication_factor: 1 storage: - type: 'filesystem' - singleBinary: - replicas: 1 - extraArgs: ["-log.level=debug"] + type: "s3" + s3: + endpoint: "meta-minio.meta.svc:9000" + access_key_id: rootuser + secret_access_key: rootpassword + insecure: true + bucketNames: + chunks: loki-chunks + ruler: loki-ruler monitoring: dashboards: enabled: false @@ -51,4 +64,78 @@ grafana-agent: name: "agent-configmap" key: 'config.river' -# mimir-distributed: +mimir-distributed: + minio: + enabled: false + mimir: + structuredConfig: + alertmanager_storage: + s3: + bucket_name: mimir-ruler + access_key_id: "{{ .Values.global.minio.rootUser }}" + endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" + secret_access_key: "{{ .Values.global.minio.rootPassword }}" + insecure: true + blocks_storage: + backend: s3 + s3: + bucket_name: mimir-tsdb + access_key_id: "{{ .Values.global.minio.rootUser }}" + endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" + secret_access_key: "{{ .Values.global.minio.rootPassword }}" + insecure: true + ruler_storage: + s3: + bucket_name: mimir-ruler + access_key_id: "{{ .Values.global.minio.rootUser }}" + endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" + secret_access_key: "{{ .Values.global.minio.rootPassword }}" + insecure: true + +tempo-distributed: + tempo: + structuredConfig: + storage: + trace: + backend: s3 + s3: + bucket: tempo + endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" + access_key: "{{ .Values.global.minio.rootUser }}" + secret_key: "{{ .Values.global.minio.rootPassword }}" + insecure: true + traces: + otlp: + http: + enabled: true + grpc: + enabled: true + +minio: + rootUser: rootuser + rootPassword: rootpassword + buckets: + - name: loki-chunks + policy: none + purge: false + - name: loki-ruler + policy: none + purge: false + - name: tempo + policy: none + purge: false + - name: mimir-ruler + policy: none + purge: false + - name: mimir-tsdb + policy: none + purge: false + mode: standalone + persistence: + size: 5Gi + resources: + requests: + cpu: 100m + memory: 128Mi + # Changed the mc config path to '/tmp' from '/etc' as '/etc' is only writable by root and OpenShift will not permit this. + configPathmc: "/tmp/minio/mc/" \ No newline at end of file