diff --git a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl index d8d6642..48d84ab 100644 --- a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl +++ b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl @@ -9,6 +9,15 @@ {{- define "agent.all_namespaces" -}} {{- $list := list }} {{- range .Values.namespacesToMonitor }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- $list = append $list (printf "\"%s\"" .Release.Namespace) }} +{{- join ", " $list }} +{{- end }} + +{{- define "agent.all_namespaces_bar" -}} +{{- $list := list }} +{{- range .Values.namespacesToMonitor }} {{- $list = append $list (printf "%s" .) }} {{- end }} {{- $list = append $list .Release.Namespace }} diff --git a/charts/meta-monitoring/templates/agent/config.yaml b/charts/meta-monitoring/templates/agent/config.yaml index 55e6963..b356c95 100644 --- a/charts/meta-monitoring/templates/agent/config.yaml +++ b/charts/meta-monitoring/templates/agent/config.yaml @@ -93,7 +93,7 @@ data: role = "pod" namespaces { own_namespace = true - names = [ {{ include "agent.namespaces" . }} ] + names = [ {{ include "agent.all_namespaces" . }} ] } } @@ -143,7 +143,7 @@ data: rule { source_labels = ["namespace"] - regex = "{{ include "agent.all_namespaces" . }}" + regex = "{{ include "agent.all_namespaces_bar" . }}" action = "keep" } diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index 476cbb6..f100220 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -75,9 +75,12 @@ logs: metrics: # The list of metrics to retain for logging dashboards retain: + - agent_build_info - agent_config_last_load_success_timestamp_seconds - agent_config_last_load_successful - agent_config_load_failures_total + - agent_wal_samples_appended_total + - agent_wal_storage_active_series - container_cpu_usage_seconds_total - container_fs_writes_bytes_total - container_memory_working_set_bytes @@ -94,6 +97,7 @@ metrics: - cortex_prometheus_rule_group_last_evaluation_timestamp_seconds - cortex_prometheus_rule_group_iterations_missed_total - go_gc_duration_seconds + - go_gc_duration_seconds_count - go_goroutines - go_memstats_heap_inuse_bytes - kubelet_volume_stats_used_bytes @@ -130,6 +134,7 @@ metrics: - loki_compactor_oldest_pending_delete_request_age_seconds - loki_compactor_pending_delete_requests_count - loki_discarded_samples_total + - loki_discarded_bytes_total - loki_distributor_bytes_received_total - loki_distributor_lines_received_total - loki_distributor_structured_metadata_bytes_received_total @@ -169,7 +174,46 @@ metrics: - loki_write_sent_entries_total - node_disk_read_bytes_total - node_disk_written_bytes_total + - process_start_time_seconds + - prometheus_remote_storage_enqueue_retries_total + - prometheus_remote_storage_highest_timestamp_in_seconds + - prometheus_remote_storage_queue_highest_sent_timestamp_seconds + - prometheus_remote_storage_samples_dropped_total + - prometheus_remote_storage_samples_failed_total + - prometheus_remote_storage_samples_pending + - prometheus_remote_storage_samples_retried_total + - prometheus_remote_storage_samples_total + - prometheus_remote_storage_sent_batch_duration_seconds_bucket + - prometheus_remote_storage_sent_batch_duration_seconds_count + - prometheus_remote_storage_sent_batch_duration_seconds_sum + - prometheus_remote_storage_shard_capacity + - prometheus_remote_storage_shards + - prometheus_remote_storage_shards_desired + - prometheus_remote_storage_shards_max + - prometheus_remote_storage_shards_min + - prometheus_remote_storage_succeeded_samples_total + - prometheus_sd_discovered_targets + - prometheus_target_interval_length_seconds_count + - prometheus_target_interval_length_seconds_sum + - prometheus_target_scrapes_exceeded_sample_limit_total + - prometheus_target_scrapes_sample_duplicate_timestamp_total + - prometheus_target_scrapes_sample_out_of_bounds_total + - prometheus_target_scrapes_sample_out_of_order_total + - prometheus_target_sync_length_seconds_sum + - prometheus_wal_watcher_current_segment - promtail_custom_bad_words_total + - promtail_dropped_bytes_total + - promtail_files_active_total + - promtail_read_bytes_total + - promtail_read_lines_total + - promtail_request_duration_seconds_bucket + - promtail_sent_entries_total + - traces_exporter_sent_spans + - traces_exporter_send_failed_spans + - traces_loadbalancer_backend_outcome + - traces_loadbalancer_num_backends + - traces_receiver_accepted_spans + - traces_receiver_refused_spans # Additional metrics to retain extraMetrics: [] # Set enabled = true to add the default logs dashboards to the local Grafana