You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
279 lines
7.1 KiB
279 lines
7.1 KiB
// Copyright 2019 The prometheus-operator Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package e2e
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/Jeffail/gabs"
|
|
"github.com/pkg/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/tools/clientcmd"
|
|
)
|
|
|
|
var promClient *prometheusClient
|
|
|
|
func TestMain(m *testing.M) {
|
|
os.Exit(testMain(m))
|
|
}
|
|
|
|
// testMain circumvents the issue, that one can not call `defer` in TestMain, as
|
|
// `os.Exit` does not honor `defer` statements. For more details see:
|
|
// http://blog.englund.nu/golang,/testing/2017/03/12/using-defer-in-testmain.html
|
|
func testMain(m *testing.M) int {
|
|
kubeConfigPath, ok := os.LookupEnv("KUBECONFIG")
|
|
if !ok {
|
|
log.Fatal("failed to retrieve KUBECONFIG env var")
|
|
}
|
|
|
|
config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
kubeClient, err := kubernetes.NewForConfig(config)
|
|
if err != nil {
|
|
log.Fatal(errors.Wrap(err, "creating kubeClient failed"))
|
|
}
|
|
|
|
promClient = newPrometheusClient(kubeClient)
|
|
|
|
return m.Run()
|
|
}
|
|
|
|
func TestQueryPrometheus(t *testing.T) {
|
|
queries := []struct {
|
|
query string
|
|
expectN int
|
|
}{
|
|
{
|
|
query: `up{job="node-exporter"} == 1`,
|
|
expectN: 1,
|
|
}, {
|
|
// query: `up{job="kubelet"} == 1`,
|
|
// expectN: 1,
|
|
// }, {
|
|
query: `up{job="apiserver"} == 1`,
|
|
expectN: 1,
|
|
}, {
|
|
query: `up{job="kube-state-metrics"} == 1`,
|
|
expectN: 1,
|
|
}, {
|
|
query: `up{job="prometheus-k8s"} == 1`,
|
|
expectN: 1,
|
|
}, {
|
|
query: `up{job="prometheus-operator"} == 1`,
|
|
expectN: 1,
|
|
}, {
|
|
query: `up{job="alertmanager-main"} == 1`,
|
|
expectN: 2,
|
|
},
|
|
}
|
|
|
|
// Wait for pod to respond at queries at all. Then start verifying their results.
|
|
err := wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
|
|
_, err := promClient.query("up")
|
|
return err == nil, nil
|
|
})
|
|
if err != nil {
|
|
t.Fatal(errors.Wrap(err, "wait for prometheus-k8s"))
|
|
}
|
|
|
|
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
|
|
defer t.Log("---------------------------\n")
|
|
|
|
for _, q := range queries {
|
|
n, err := promClient.query(q.query)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if n < q.expectN {
|
|
// Don't return an error as targets may only become visible after a while.
|
|
t.Logf("expected at least %d results for %q but got %d", q.expectN, q.query, n)
|
|
return false, nil
|
|
}
|
|
t.Logf("query %q succeeded", q.query)
|
|
}
|
|
return true, nil
|
|
})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func TestDroppedMetrics(t *testing.T) {
|
|
// query metadata for all metrics and their metadata
|
|
md, err := promClient.metadata("{job=~\".+\"}")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
for _, k := range md {
|
|
// check if the metric' help text contains Deprecated
|
|
if strings.Contains(k.Help, "Deprecated") {
|
|
// query prometheus for the Deprecated metric
|
|
n, err := promClient.query(k.Metric)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if n > 0 {
|
|
t.Fatalf("deprecated metric with name: %s and help text: %s exists.", k.Metric, k.Help)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestTargetsScheme(t *testing.T) {
|
|
// query targets for all endpoints
|
|
tgs, err := promClient.targets()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// exclude jobs from checking for http endpoints
|
|
// TODO(paulfantom): This should be reduced as we secure connections for those components
|
|
exclude := map[string]bool{
|
|
"alertmanager-main": true,
|
|
"prometheus-k8s": true,
|
|
"kube-dns": true,
|
|
"grafana": true,
|
|
}
|
|
|
|
for _, k := range tgs.Active {
|
|
job := k.Labels["job"]
|
|
if k.DiscoveredLabels["__scheme__"] == "http" && !exclude[string(job)] {
|
|
t.Fatalf("target exposing metrics over HTTP instead of HTTPS: %+v", k)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestFailedRuleEvaluations detects recording and alerting rules that may
|
|
// trigger "many-to-many" evaluation errors when multiple kube-state-metrics
|
|
// instances are running.
|
|
func TestFailedRuleEvaluations(t *testing.T) {
|
|
// Scale kube-state-metrics to 2 replicas.
|
|
kClient := promClient.kubeClient
|
|
|
|
scale, err := kClient.AppsV1().Deployments("monitoring").GetScale(context.Background(), "kube-state-metrics", metav1.GetOptions{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
scale.Spec.Replicas = 2
|
|
scale, err = kClient.AppsV1().Deployments("monitoring").UpdateScale(context.Background(), "kube-state-metrics", scale, metav1.UpdateOptions{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Rollback to 1 replica at the end of the test.
|
|
defer func() {
|
|
scale, err := kClient.AppsV1().Deployments("monitoring").GetScale(context.Background(), "kube-state-metrics", metav1.GetOptions{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
scale.Spec.Replicas = 1
|
|
_, err = kClient.AppsV1().Deployments("monitoring").UpdateScale(context.Background(), "kube-state-metrics", scale, metav1.UpdateOptions{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}()
|
|
|
|
// Wait for the 2 replicas of kube-state-metrics to be successfully scraped.
|
|
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
|
|
n, err := promClient.query(`up{job="kube-state-metrics"} == 1`)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if n != 2 {
|
|
t.Logf("expecting 2 kube-state-metrics targets, got %d", n)
|
|
return false, nil
|
|
}
|
|
|
|
return true, nil
|
|
})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Wait for all rule groups to be evaluated at least once without error.
|
|
now := time.Now()
|
|
err = wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) {
|
|
rsp, err := promClient.apiRequest("/api/v1/rules", "type", "")
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
res, err := gabs.ParseJSON(rsp.Data)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
groups, err := res.Path("groups").Children()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if len(groups) == 0 {
|
|
return false, errors.New("got 0 rule groups")
|
|
}
|
|
|
|
for _, group := range groups {
|
|
groupName := group.Path("name").Data().(string)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
lastEvalString := group.Path("lastEvaluation").Data().(string)
|
|
lastEval, err := time.Parse(time.RFC3339Nano, lastEvalString)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if lastEval.Before(now) {
|
|
t.Logf("%s not yet evaluated", groupName)
|
|
return false, nil
|
|
}
|
|
|
|
rules, err := group.Path("rules").Children()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if len(rules) == 0 {
|
|
return false, errors.Errorf("got 0 rules in group %s", groupName)
|
|
}
|
|
|
|
for _, rule := range rules {
|
|
health := rule.Path("health").Data().(string)
|
|
if health != "ok" {
|
|
return false, errors.Errorf("error evaluating rule: %v", rule)
|
|
}
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|