# Specify the namespaces to monitor here namespacesToMonitor: - loki # The name of the cluster where this will be installed clusterLabelValue: "meta-monitoring" # Set to true to write logs, metrics or traces to Grafana Cloud # The secrets have to be created first cloud: logs: enabled: true secret: "logs" metrics: enabled: true secret: "metrics" traces: enabled: true secret: "traces" # Set to true for a local version of logs, metrics or traces local: grafana: enabled: false logs: enabled: false metrics: enabled: false traces: enabled: false minio: enabled: false # This should be set to true if any of the previous is enabled grafana: # Gateway ingress configuration ingress: # -- Specifies whether an ingress for the gateway should be created enabled: true # -- Ingress Class Name. MAY be required for Kubernetes versions >= 1.18 ingressClassName: "" # -- Annotations for the gateway ingress annotations: { } # -- Labels for the gateway ingress labels: { } # -- Hosts configuration for the gateway ingress, passed through the `tpl` function to allow templating hosts: - host: monitoring.example.com paths: - path: / # -- pathType (e.g. ImplementationSpecific, Prefix, .. etc.) might also be required by some Ingress Controllers # pathType: Prefix # -- TLS configuration for the gateway ingress. Hosts passed through the `tpl` function to allow templating #tls: # - secretName: grafana-tls # hosts: # - monitoring.example.com logs: # Adding regexes here will add a stage.replace block for logs. For more information see # https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block piiRegexes: # This example replaces the word after password with ***** # - expression: "password (\\\\S+)" # source: "" # Empty uses the log message # replace: "*****"" # The lines matching these will be kept in Loki retain: # This shows the queries - caller=metrics.go # This shows any errors - level=error # Log lines for delete requests - delete request for user added - Started processing delete request - delete request for user marked as processed # This shows the ingest requests and is very noisy. Uncomment to include. # - caller=push.go # Additional log lines to retain extraLogs: [] metrics: # The list of metrics to retain for logging dashboards retain: - agent_config_last_load_success_timestamp_seconds - agent_config_last_load_successful - agent_config_load_failures_total - container_cpu_usage_seconds_total - container_fs_writes_bytes_total - container_memory_working_set_bytes - container_network_receive_bytes_total - container_network_transmit_bytes_total - container_spec_cpu_period - container_spec_cpu_quota - container_spec_memory_limit_bytes - cortex_ingester_flush_queue_length - cortex_prometheus_rule_group_iterations_total - cortex_prometheus_rule_evaluation_failures_total - cortex_prometheus_rule_group_rules - cortex_prometheus_rule_group_last_duration_seconds - cortex_prometheus_rule_group_last_evaluation_timestamp_seconds - cortex_prometheus_rule_group_iterations_missed_total - go_gc_duration_seconds - go_goroutines - go_memstats_heap_inuse_bytes - kubelet_volume_stats_used_bytes - kubelet_volume_stats_capacity_bytes - kube_deployment_created - kube_persistentvolumeclaim_labels - kube_pod_container_info - kube_pod_container_resource_requests - kube_pod_container_status_last_terminated_reason - kube_pod_container_status_restarts_total - loki_boltdb_shipper_compact_tables_operation_duration_seconds - loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds - loki_boltdb_shipper_retention_marker_count_total - loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket - loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count - loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum - loki_boltdb_shipper_retention_marker_table_processed_total - loki_boltdb_shipper_request_duration_seconds_bucket - loki_boltdb_shipper_request_duration_seconds_count - loki_boltdb_shipper_request_duration_seconds_sum - loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket - loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count - loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum - loki_boltdb_shipper_retention_sweeper_marker_files_current - loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time - loki_build_info - loki_chunk_store_deduped_chunks_total - loki_chunk_store_index_entries_per_chunk_bucket - loki_chunk_store_index_entries_per_chunk_count - loki_chunk_store_index_entries_per_chunk_sum - loki_compactor_delete_requests_processed_total - loki_compactor_delete_requests_received_total - loki_compactor_deleted_lines - loki_compactor_oldest_pending_delete_request_age_seconds - loki_compactor_pending_delete_requests_count - loki_discarded_samples_total - loki_distributor_bytes_received_total - loki_distributor_lines_received_total - loki_distributor_structured_metadata_bytes_received_total - loki_index_request_duration_seconds_count - loki_ingester_chunk_age_seconds_bucket - loki_ingester_chunk_age_seconds_count - loki_ingester_chunk_age_seconds_sum - loki_ingester_chunk_bounds_hours_bucket - loki_ingester_chunk_bounds_hours_count - loki_ingester_chunk_bounds_hours_sum - loki_ingester_chunk_entries_bucket - loki_ingester_chunk_entries_count - loki_ingester_chunk_entries_sum - loki_ingester_chunk_size_bytes_bucket - loki_ingester_chunk_utilization_bucket - loki_ingester_chunk_utilization_sum - loki_ingester_chunks_flushed_total - loki_ingester_flush_queue_length - loki_ingester_memory_chunks - loki_ingester_memory_streams - loki_ingester_streams_created_total - loki_request_duration_seconds_bucket - loki_request_duration_seconds_count - loki_request_duration_seconds_sum - loki_ruler_wal_appender_ready - loki_ruler_wal_disk_size - loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds - loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds - loki_ruler_wal_prometheus_remote_storage_samples_pending - loki_ruler_wal_prometheus_remote_storage_samples_total - loki_ruler_wal_samples_appended_total - loki_ruler_wal_storage_created_series_total - loki_write_batch_retries_total - loki_write_dropped_bytes_total - loki_write_dropped_entries_total - loki_write_sent_bytes_total - loki_write_sent_entries_total - node_disk_read_bytes_total - node_disk_written_bytes_total - promtail_custom_bad_words_total # Additional metrics to retain extraMetrics: [] # Set enabled = true to add the default logs/metrics/traces dashboards to the local Grafana dashboards: logs: enabled: true metrics: enabled: true traces: enabled: true global: minio: rootUser: "rootuser" rootPassword: "rootpassword" kubeStateMetrics: # Scrape https://github.com/kubernetes/kube-state-metrics by default enabled: true # This endpoint is created when the helm chart from # https://artifacthub.io/packages/helm/prometheus-community/kube-state-metrics/ # is used. Change this if kube-state-metrics is installed somewhere else. endpoint: kube-state-metrics.kube-state-metrics.svc.cluster.local:8080 # The following are configuration for the dependencies. # These should usually not be changed. loki: loki: auth_enabled: false schemaConfig: configs: - from: 2024-03-29 store: tsdb object_store: s3 schema: v13 index: prefix: index_ period: 24h storage: type: "s3" s3: insecure: true s3ForcePathStyle: true bucketNames: chunks: loki-chunks ruler: loki-ruler structuredConfig: common: storage: s3: access_key_id: "${rootUser}" endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" secret_access_key: "${rootPassword}" compactor: retention_enabled: true delete_request_store: s3 limits_config: retention_period: 30d lokiCanary: enabled: false test: enabled: false monitoring: dashboards: enabled: false rules: enabled: false serviceMonitor: enabled: false selfMonitoring: enabled: false grafanaAgent: installOperator: false lokiCanary: enabled: false write: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" read: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" backend: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" alloy: alloy: clustering: enabled: true configMap: create: false name: "agent-configmap" key: 'config.river' resources: requests: cpu: '1000m' memory: '600Mi' limits: memory: '4Gi' extraPorts: - name: "otel" port: 4317 targetPort: 4317 protocol: "TCP" - name: "thrifthttp" port: 14268 targetPort: 14268 protocol: "TCP" controller: type: "statefulset" autoscaling: enabled: true minReplicas: 3 maxReplicas: 30 targetMemoryUtilizationPercentage: 90 targetCPUUtilizationPercentage: 90 mimir-distributed: minio: enabled: false global: extraEnvFrom: - secretRef: name: "mmc-minio" mimir: structuredConfig: alertmanager_storage: s3: bucket_name: mimir-ruler blocks_storage: backend: s3 s3: bucket_name: mimir-tsdb ruler_storage: s3: bucket_name: mimir-ruler common: storage: backend: s3 s3: bucket_name: mimir-ruler access_key_id: "${rootUser}" endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" secret_access_key: "${rootPassword}" insecure: true limits: compactor_blocks_retention_period: 30d tempo-distributed: tempo: structuredConfig: storage: trace: backend: s3 s3: bucket: tempo endpoint: "{{ .Release.Name }}-minio.{{ .Release.Namespace }}.svc:9000" access_key: "${rootUser}" secret_key: "${rootPassword}" insecure: true distributor: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" ingester: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" compactor: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" querier: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" queryFrontend: extraArgs: - "-config.expand-env=true" extraEnvFrom: - secretRef: name: "mmc-minio" traces: otlp: http: enabled: true grpc: enabled: true minio: existingSecret: "minio" buckets: - name: loki-chunks policy: none purge: false - name: loki-ruler policy: none purge: false - name: tempo policy: none purge: false - name: mimir-ruler policy: none purge: false - name: mimir-tsdb policy: none purge: false mode: standalone persistence: size: 5Gi resources: requests: cpu: 100m memory: 128Mi # Changed the mc config path to '/tmp' from '/etc' as '/etc' is only writable by root and OpenShift will not permit this. configPathmc: "/tmp/minio/mc/"