You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

326 lines
10 KiB

3 months ago
local relabelings = import '../addons/dropping-deprecated-metrics-relabelings.libsonnet';
local defaults = {
namespace: error 'must provide namespace',
commonLabels:: {
'app.kubernetes.io/name': 'kube-prometheus',
'app.kubernetes.io/part-of': 'kube-prometheus',
},
mixin: {
ruleLabels: {},
_config: {
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
kubeSchedulerSelector: 'job="kube-scheduler"',
kubeControllerManagerSelector: 'job="kube-controller-manager"',
kubeApiserverSelector: 'job="apiserver"',
podLabel: 'pod',
runbookURLPattern: 'https://runbooks.prometheus-operator.dev/runbooks/kubernetes/%s',
diskDeviceSelector: 'device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"',
hostNetworkInterfaceSelector: 'device!~"veth.+"',
},
},
kubeProxy: false,
};
function(params) {
local k8s = self,
_config:: defaults + params,
mixin:: (import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') {
_config+:: k8s._config.mixin._config,
},
prometheusRule: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: k8s._config.commonLabels + k8s._config.mixin.ruleLabels,
name: 'kubernetes-monitoring-rules',
namespace: k8s._config.namespace,
},
spec: {
local r = if std.objectHasAll(k8s.mixin, 'prometheusRules') then k8s.mixin.prometheusRules.groups else {},
local a = if std.objectHasAll(k8s.mixin, 'prometheusAlerts') then k8s.mixin.prometheusAlerts.groups else {},
groups: a + r,
},
},
serviceMonitorKubeScheduler: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-scheduler',
namespace: k8s._config.namespace,
labels: { 'app.kubernetes.io/name': 'kube-scheduler' },
},
spec: {
jobLabel: 'app.kubernetes.io/name',
endpoints: [{
port: 'https-metrics',
interval: '30s',
scheme: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: { insecureSkipVerify: true },
}],
selector: {
matchLabels: { 'app.kubernetes.io/name': 'kube-scheduler' },
},
namespaceSelector: {
matchNames: ['kube-system'],
},
},
},
serviceMonitorKubelet: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kubelet',
namespace: k8s._config.namespace,
labels: { 'app.kubernetes.io/name': 'kubelet' },
},
spec: {
jobLabel: 'app.kubernetes.io/name',
endpoints: [
{
port: 'https-metrics',
scheme: 'https',
interval: '30s',
honorLabels: true,
tlsConfig: { insecureSkipVerify: true },
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: relabelings,
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
}],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/cadvisor',
interval: '30s',
honorLabels: true,
honorTimestamps: false,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
}],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{
sourceLabels: ['__name__'],
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
action: 'drop',
},
// Drop cAdvisor metrics with no (pod, namespace) labels while preserving ability to monitor system services resource usage (cardinality estimation)
{
sourceLabels: ['__name__', 'pod', 'namespace'],
action: 'drop',
regex: '(' + std.join('|',
[
'container_spec_.*', // everything related to cgroup specification and thus static data (nodes*services*5)
'container_file_descriptors', // file descriptors limits and global numbers are exposed via (nodes*services)
'container_sockets', // used sockets in cgroup. Usually not important for system services (nodes*services)
'container_threads_max', // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
'container_threads', // used threads in cgroup. Usually not important for system services (nodes*services)
'container_start_time_seconds', // container start. Possibly not needed for system services (nodes*services)
'container_last_seen', // not needed as system services are always running (nodes*services)
]) + ');;',
},
{
sourceLabels: ['__name__', 'container'],
action: 'drop',
regex: '(' + std.join('|',
[
'container_blkio_device_usage_total',
]) + ');.+',
},
],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/probes',
interval: '30s',
honorLabels: true,
tlsConfig: { insecureSkipVerify: true },
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
}],
},
],
selector: {
matchLabels: { 'app.kubernetes.io/name': 'kubelet' },
},
namespaceSelector: {
matchNames: ['kube-system'],
},
},
},
serviceMonitorKubeControllerManager: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-controller-manager',
namespace: k8s._config.namespace,
labels: { 'app.kubernetes.io/name': 'kube-controller-manager' },
},
spec: {
jobLabel: 'app.kubernetes.io/name',
endpoints: [{
port: 'https-metrics',
interval: '30s',
scheme: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true,
},
metricRelabelings: relabelings + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
action: 'drop',
},
],
}],
selector: {
matchLabels: { 'app.kubernetes.io/name': 'kube-controller-manager' },
},
namespaceSelector: {
matchNames: ['kube-system'],
},
},
},
serviceMonitorApiserver: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-apiserver',
namespace: k8s._config.namespace,
labels: { 'app.kubernetes.io/name': 'apiserver' },
},
spec: {
jobLabel: 'component',
selector: {
matchLabels: {
component: 'apiserver',
provider: 'kubernetes',
},
},
namespaceSelector: {
matchNames: ['default'],
},
endpoints: [{
port: 'https',
interval: '30s',
scheme: 'https',
tlsConfig: {
caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: relabelings + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|server).*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_controller_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__', 'le'],
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
action: 'drop',
},
],
}],
},
},
[if (defaults + params).kubeProxy then 'podMonitorKubeProxy']: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PodMonitor',
metadata: {
labels: {
'k8s-app': 'kube-proxy',
},
name: 'kube-proxy',
namespace: k8s._config.namespace,
},
spec: {
jobLabel: 'k8s-app',
namespaceSelector: {
matchNames: [
'kube-system',
],
},
selector: {
matchLabels: {
'k8s-app': 'kube-proxy',
},
},
podMetricsEndpoints: [{
honorLabels: true,
targetPort: 10249,
relabelings: [
{
action: 'replace',
regex: '(.*)',
replacement: '$1',
sourceLabels: ['__meta_kubernetes_pod_node_name'],
targetLabel: 'instance',
},
],
}],
},
},
serviceMonitorCoreDNS: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'coredns',
namespace: k8s._config.namespace,
labels: { 'app.kubernetes.io/name': 'coredns' },
},
spec: {
jobLabel: 'app.kubernetes.io/name',
selector: {
matchLabels: { 'k8s-app': 'kube-dns' },
},
namespaceSelector: {
matchNames: ['kube-system'],
},
endpoints: [{
port: 'metrics',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
}],
},
},
}