Scrape more metrics from more places

Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
This commit is contained in:
Michel Hollands 2024-05-08 13:06:03 +01:00
parent 890137e7b3
commit f89a6816a8
3 changed files with 55 additions and 2 deletions

View File

@ -9,6 +9,15 @@
{{- define "agent.all_namespaces" -}} {{- define "agent.all_namespaces" -}}
{{- $list := list }} {{- $list := list }}
{{- range .Values.namespacesToMonitor }} {{- range .Values.namespacesToMonitor }}
{{- $list = append $list (printf "\"%s\"" .) }}
{{- end }}
{{- $list = append $list (printf "\"%s\"" .Release.Namespace) }}
{{- join ", " $list }}
{{- end }}
{{- define "agent.all_namespaces_bar" -}}
{{- $list := list }}
{{- range .Values.namespacesToMonitor }}
{{- $list = append $list (printf "%s" .) }} {{- $list = append $list (printf "%s" .) }}
{{- end }} {{- end }}
{{- $list = append $list .Release.Namespace }} {{- $list = append $list .Release.Namespace }}

View File

@ -93,7 +93,7 @@ data:
role = "pod" role = "pod"
namespaces { namespaces {
own_namespace = true own_namespace = true
names = [ {{ include "agent.namespaces" . }} ] names = [ {{ include "agent.all_namespaces" . }} ]
} }
} }
@ -143,7 +143,7 @@ data:
rule { rule {
source_labels = ["namespace"] source_labels = ["namespace"]
regex = "{{ include "agent.all_namespaces" . }}" regex = "{{ include "agent.all_namespaces_bar" . }}"
action = "keep" action = "keep"
} }

View File

@ -75,9 +75,12 @@ logs:
metrics: metrics:
# The list of metrics to retain for logging dashboards # The list of metrics to retain for logging dashboards
retain: retain:
- agent_build_info
- agent_config_last_load_success_timestamp_seconds - agent_config_last_load_success_timestamp_seconds
- agent_config_last_load_successful - agent_config_last_load_successful
- agent_config_load_failures_total - agent_config_load_failures_total
- agent_wal_samples_appended_total
- agent_wal_storage_active_series
- container_cpu_usage_seconds_total - container_cpu_usage_seconds_total
- container_fs_writes_bytes_total - container_fs_writes_bytes_total
- container_memory_working_set_bytes - container_memory_working_set_bytes
@ -94,6 +97,7 @@ metrics:
- cortex_prometheus_rule_group_last_evaluation_timestamp_seconds - cortex_prometheus_rule_group_last_evaluation_timestamp_seconds
- cortex_prometheus_rule_group_iterations_missed_total - cortex_prometheus_rule_group_iterations_missed_total
- go_gc_duration_seconds - go_gc_duration_seconds
- go_gc_duration_seconds_count
- go_goroutines - go_goroutines
- go_memstats_heap_inuse_bytes - go_memstats_heap_inuse_bytes
- kubelet_volume_stats_used_bytes - kubelet_volume_stats_used_bytes
@ -130,6 +134,7 @@ metrics:
- loki_compactor_oldest_pending_delete_request_age_seconds - loki_compactor_oldest_pending_delete_request_age_seconds
- loki_compactor_pending_delete_requests_count - loki_compactor_pending_delete_requests_count
- loki_discarded_samples_total - loki_discarded_samples_total
- loki_discarded_bytes_total
- loki_distributor_bytes_received_total - loki_distributor_bytes_received_total
- loki_distributor_lines_received_total - loki_distributor_lines_received_total
- loki_distributor_structured_metadata_bytes_received_total - loki_distributor_structured_metadata_bytes_received_total
@ -169,7 +174,46 @@ metrics:
- loki_write_sent_entries_total - loki_write_sent_entries_total
- node_disk_read_bytes_total - node_disk_read_bytes_total
- node_disk_written_bytes_total - node_disk_written_bytes_total
- process_start_time_seconds
- prometheus_remote_storage_enqueue_retries_total
- prometheus_remote_storage_highest_timestamp_in_seconds
- prometheus_remote_storage_queue_highest_sent_timestamp_seconds
- prometheus_remote_storage_samples_dropped_total
- prometheus_remote_storage_samples_failed_total
- prometheus_remote_storage_samples_pending
- prometheus_remote_storage_samples_retried_total
- prometheus_remote_storage_samples_total
- prometheus_remote_storage_sent_batch_duration_seconds_bucket
- prometheus_remote_storage_sent_batch_duration_seconds_count
- prometheus_remote_storage_sent_batch_duration_seconds_sum
- prometheus_remote_storage_shard_capacity
- prometheus_remote_storage_shards
- prometheus_remote_storage_shards_desired
- prometheus_remote_storage_shards_max
- prometheus_remote_storage_shards_min
- prometheus_remote_storage_succeeded_samples_total
- prometheus_sd_discovered_targets
- prometheus_target_interval_length_seconds_count
- prometheus_target_interval_length_seconds_sum
- prometheus_target_scrapes_exceeded_sample_limit_total
- prometheus_target_scrapes_sample_duplicate_timestamp_total
- prometheus_target_scrapes_sample_out_of_bounds_total
- prometheus_target_scrapes_sample_out_of_order_total
- prometheus_target_sync_length_seconds_sum
- prometheus_wal_watcher_current_segment
- promtail_custom_bad_words_total - promtail_custom_bad_words_total
- promtail_dropped_bytes_total
- promtail_files_active_total
- promtail_read_bytes_total
- promtail_read_lines_total
- promtail_request_duration_seconds_bucket
- promtail_sent_entries_total
- traces_exporter_sent_spans
- traces_exporter_send_failed_spans
- traces_loadbalancer_backend_outcome
- traces_loadbalancer_num_backends
- traces_receiver_accepted_spans
- traces_receiver_refused_spans
# Additional metrics to retain # Additional metrics to retain
extraMetrics: [] extraMetrics: []
# Set enabled = true to add the default logs dashboards to the local Grafana # Set enabled = true to add the default logs dashboards to the local Grafana