Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/defaultmonitortests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/openshift/origin/pkg/monitortests/testframework/additionaleventscollector"
"github.com/openshift/origin/pkg/monitortests/testframework/alertanalyzer"
"github.com/openshift/origin/pkg/monitortests/testframework/clusterinfoserializer"
"github.com/openshift/origin/pkg/monitortests/testframework/clusterinstancetypes"
"github.com/openshift/origin/pkg/monitortests/testframework/cpumetriccollector"
"github.com/openshift/origin/pkg/monitortests/testframework/disruptionexternalawscloudservicemonitoring"
"github.com/openshift/origin/pkg/monitortests/testframework/disruptionexternalazurecloudservicemonitoring"
Expand Down Expand Up @@ -208,6 +209,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio
monitorTestRegistry.AddMonitorTestOrDie("interval-serializer", "Test Framework", intervalserializer.NewIntervalSerializer())
monitorTestRegistry.AddMonitorTestOrDie("tracked-resources-serializer", "Test Framework", trackedresourcesserializer.NewTrackedResourcesSerializer())
monitorTestRegistry.AddMonitorTestOrDie("cluster-info-serializer", "Test Framework", clusterinfoserializer.NewClusterInfoSerializer())
monitorTestRegistry.AddMonitorTestOrDie("cluster-instance-types", "Test Framework", clusterinstancetypes.NewClusterInstanceTypes())
monitorTestRegistry.AddMonitorTestOrDie("additional-events-collector", "Test Framework", additionaleventscollector.NewIntervalSerializer())
monitorTestRegistry.AddMonitorTestOrDie("known-image-checker", "Test Framework", knownimagechecker.NewEnsureValidImages())
monitorTestRegistry.AddMonitorTestOrDie("e2e-test-analyzer", "Test Framework", e2etestanalyzer.NewAnalyzer())
Expand Down
248 changes: 248 additions & 0 deletions pkg/monitortests/testframework/clusterinstancetypes/monitortest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
package clusterinstancetypes

import (
"context"
"encoding/json"
"fmt"
"path/filepath"
"sort"
"strings"
"time"

configv1 "github.com/openshift/api/config/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
configclient "github.com/openshift/client-go/config/clientset/versioned"
machineclient "github.com/openshift/client-go/machine/clientset/versioned"
"github.com/openshift/origin/pkg/dataloader"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestframework"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)

type clusterInstanceTypes struct {
adminRESTConfig *rest.Config
data []instanceTypeRow
}

type instanceTypeRow struct {
Platform string `json:"platform"`
Region string `json:"region"`
Role string `json:"role"`
InstanceType string `json:"instance_type"`

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On AWS, we also support local and wavelength zones via edge compute pool.

Let's also collect zone details (for AWS only?). We can get it by inspecting node label topology.kubernetes.io/zone, for example, in this run:

topology.kubernetes.io/zone: us-west-2-pdx-1a

}

func NewClusterInstanceTypes() monitortestframework.MonitorTest {
return &clusterInstanceTypes{}
}

func (w *clusterInstanceTypes) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
return nil
}

func (w *clusterInstanceTypes) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
w.adminRESTConfig = adminRESTConfig
return nil
}

func (w *clusterInstanceTypes) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
logger := logrus.WithField("MonitorTest", "ClusterInstanceTypes")

data, err := w.collect(ctx)
if err != nil {
logger.WithError(err).Warn("failed to collect instance type data")
return nil, nil, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
w.data = data
return nil, nil, nil
}

func (*clusterInstanceTypes) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
return nil, nil
}

func (*clusterInstanceTypes) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
return nil, nil
}

func (w *clusterInstanceTypes) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
if len(w.data) == 0 {
return nil
}

rows := make([]map[string]string, 0, len(w.data))
for _, r := range w.data {
rows = append(rows, map[string]string{
"Platform": r.Platform,
"Region": r.Region,
"Role": r.Role,
"InstanceType": r.InstanceType,
})
}

dataFile := dataloader.DataFile{
TableName: "cluster_instance_types",
Schema: map[string]dataloader.DataType{
"Platform": dataloader.DataTypeString,
"Region": dataloader.DataTypeString,
"Role": dataloader.DataTypeString,
"InstanceType": dataloader.DataTypeString,
},
Rows: rows,
}

fileName := filepath.Join(storageDir, fmt.Sprintf("cluster-instance-types%s-%s", timeSuffix, dataloader.AutoDataLoaderSuffix))
if err := dataloader.WriteDataFile(fileName, dataFile); err != nil {
return fmt.Errorf("failed to write instance types autodl: %w", err)
}

return nil
}

func (*clusterInstanceTypes) Cleanup(ctx context.Context) error {
return nil
}

func (w *clusterInstanceTypes) collect(ctx context.Context) ([]instanceTypeRow, error) {
configClient, err := configclient.NewForConfig(w.adminRESTConfig)
if err != nil {
return nil, fmt.Errorf("failed to create config client: %w", err)
}

infra, err := configClient.ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get infrastructure: %w", err)
}

if infra.Status.PlatformStatus == nil {
logrus.Info("skipping instance type collection: platform status not set")
return nil, nil
}

platform := strings.ToLower(string(infra.Status.PlatformStatus.Type))
if platform != "aws" && platform != "azure" && platform != "gcp" {
logrus.WithField("platform", platform).Info("skipping instance type collection for unsupported platform")
return nil, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Azure doesn't expose region in Infrastructure CR, so we always fall back to node labels
region := getRegionFromInfrastructure(infra)
if region == "" {
kubeClient, err := kubernetes.NewForConfig(w.adminRESTConfig)
if err != nil {
logrus.WithError(err).Warn("failed to create kube client for region fallback")
} else {
nodes, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
logrus.WithError(err).Warn("failed to list nodes for region fallback")
} else if len(nodes.Items) > 0 {
region = nodes.Items[0].Labels["topology.kubernetes.io/region"]
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

machineClientSet, err := machineclient.NewForConfig(w.adminRESTConfig)
if err != nil {
return nil, fmt.Errorf("failed to create machine client: %w", err)
}

machines, err := machineClientSet.MachineV1beta1().Machines("openshift-machine-api").List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list machines: %w", err)
}
Comment on lines +152 to +155

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can get all the machine details from node labels. For example, we can extract the arch, instance type and region for all 3 aws, azure and gcp platforms👇:

kubernetes.io/arch: <arch>
node.kubernetes.io/instance-type: <type>
topology.kubernetes.io/region: <region>

For control plane nodes, they will have these labels:

node-role.kubernetes.io/control-plane: ""
node-role.kubernetes.io/master: ""

For worker nodes, they will have:

node-role.kubernetes.io/worker: ""

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought is that we likely need to switch to check ClusterAPI machines in a near future. So if we can inspect nodes, it'd ease our maintenance. WDYT?


return buildRows(platform, region, machines.Items), nil
}

func buildRows(platform, region string, machines []machinev1beta1.Machine) []instanceTypeRow {
seen := map[string]bool{}
var result []instanceTypeRow

for i := range machines {
machine := &machines[i]
role := "worker"
if isMaster(machine) {
role = "control-plane"
}
instanceType := extractInstanceType(platform, machine)
if instanceType == "" {
continue
}
key := role + "/" + instanceType
if seen[key] {
continue
}
seen[key] = true
result = append(result, instanceTypeRow{
Platform: platform,
Region: region,
Role: role,
InstanceType: instanceType,
})
}

sort.Slice(result, func(i, j int) bool {
if result[i].Role != result[j].Role {
return result[i].Role < result[j].Role
}
return result[i].InstanceType < result[j].InstanceType
})

return result
}

func getRegionFromInfrastructure(infra *configv1.Infrastructure) string {
if infra.Status.PlatformStatus == nil {
return ""
}
switch infra.Status.PlatformStatus.Type {
case configv1.AWSPlatformType:
if infra.Status.PlatformStatus.AWS != nil {
return infra.Status.PlatformStatus.AWS.Region
}
case configv1.GCPPlatformType:
if infra.Status.PlatformStatus.GCP != nil {
return infra.Status.PlatformStatus.GCP.Region
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return ""
}

func isMaster(machine *machinev1beta1.Machine) bool {
return machine.Labels["machine.openshift.io/cluster-api-machine-role"] == "master"
}

func extractInstanceType(platform string, machine *machinev1beta1.Machine) string {
if machine.Spec.ProviderSpec.Value == nil {
return ""
}
raw := machine.Spec.ProviderSpec.Value.Raw

switch platform {
case "aws":
var spec machinev1beta1.AWSMachineProviderConfig
if err := json.Unmarshal(raw, &spec); err != nil {
logrus.WithError(err).WithField("machine", machine.Name).Warn("failed to unmarshal AWS provider spec")
return ""
}
return spec.InstanceType
case "azure":
var spec machinev1beta1.AzureMachineProviderSpec
if err := json.Unmarshal(raw, &spec); err != nil {
logrus.WithError(err).WithField("machine", machine.Name).Warn("failed to unmarshal Azure provider spec")
return ""
}
return spec.VMSize
case "gcp":
var spec machinev1beta1.GCPMachineProviderSpec
if err := json.Unmarshal(raw, &spec); err != nil {
logrus.WithError(err).WithField("machine", machine.Name).Warn("failed to unmarshal GCP provider spec")
return ""
}
return spec.MachineType
}
return ""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package clusterinstancetypes

import (
"encoding/json"
"testing"

machinev1beta1 "github.com/openshift/api/machine/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

func awsMachine(t *testing.T, name, role, instanceType string) machinev1beta1.Machine {
t.Helper()
providerSpec := machinev1beta1.AWSMachineProviderConfig{
InstanceType: instanceType,
}
raw, err := json.Marshal(providerSpec)
if err != nil {
t.Fatalf("failed to marshal provider spec: %v", err)
}
return machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": role},
},
Spec: machinev1beta1.MachineSpec{
ProviderSpec: machinev1beta1.ProviderSpec{
Value: &runtime.RawExtension{Raw: raw},
},
},
}
}

func TestBuildRowsDeduplicates(t *testing.T) {
machines := []machinev1beta1.Machine{
awsMachine(t, "master-0", "master", "m6i.xlarge"),
awsMachine(t, "master-1", "master", "m6i.xlarge"),
awsMachine(t, "master-2", "master", "m6i.xlarge"),
awsMachine(t, "worker-0", "worker", "m6i.2xlarge"),
awsMachine(t, "worker-1", "worker", "m6i.2xlarge"),
awsMachine(t, "worker-2", "worker", "m6i.2xlarge"),
}

rows := buildRows("aws", "us-east-1", machines)

if len(rows) != 2 {
t.Fatalf("expected 2 deduplicated rows, got %d: %+v", len(rows), rows)
}
if rows[0].Role != "control-plane" || rows[0].InstanceType != "m6i.xlarge" {
t.Errorf("unexpected control-plane row: %+v", rows[0])
}
if rows[1].Role != "worker" || rows[1].InstanceType != "m6i.2xlarge" {
t.Errorf("unexpected worker row: %+v", rows[1])
}
}

func TestBuildRowsMixedWorkerTypes(t *testing.T) {
machines := []machinev1beta1.Machine{
awsMachine(t, "master-0", "master", "m6i.xlarge"),
awsMachine(t, "worker-0", "worker", "m5.xlarge"),
awsMachine(t, "worker-1", "worker", "m6i.2xlarge"),
awsMachine(t, "worker-2", "worker", "m5.xlarge"),
}

rows := buildRows("aws", "us-east-1", machines)

if len(rows) != 3 {
t.Fatalf("expected 3 rows (1 cp + 2 distinct worker types), got %d: %+v", len(rows), rows)
}
if rows[0].Role != "control-plane" {
t.Errorf("first row should be control-plane, got %+v", rows[0])
}
workerTypes := map[string]bool{}
for _, r := range rows[1:] {
if r.Role != "worker" {
t.Errorf("expected worker role, got %+v", r)
}
workerTypes[r.InstanceType] = true
}
if !workerTypes["m5.xlarge"] || !workerTypes["m6i.2xlarge"] {
t.Errorf("expected both worker types present, got %v", workerTypes)
}
}

func TestBuildRowsSortsControlPlaneFirst(t *testing.T) {
machines := []machinev1beta1.Machine{
awsMachine(t, "worker-0", "worker", "m5.xlarge"),
awsMachine(t, "master-0", "master", "m6i.xlarge"),
}

rows := buildRows("aws", "us-east-1", machines)

if len(rows) != 2 {
t.Fatalf("expected 2 rows, got %d", len(rows))
}
if rows[0].Role != "control-plane" {
t.Errorf("control-plane should sort first, got %+v", rows[0])
}
}

func TestBuildRowsPropagatesPlatformAndRegion(t *testing.T) {
machines := []machinev1beta1.Machine{
awsMachine(t, "master-0", "master", "m6i.xlarge"),
}

rows := buildRows("aws", "eu-west-1", machines)

if rows[0].Platform != "aws" || rows[0].Region != "eu-west-1" {
t.Errorf("expected platform=aws region=eu-west-1, got %+v", rows[0])
}
}

func TestBuildRowsSkipsEmptyProviderSpec(t *testing.T) {
machines := []machinev1beta1.Machine{
awsMachine(t, "master-0", "master", "m6i.xlarge"),
{
ObjectMeta: metav1.ObjectMeta{
Name: "worker-no-spec",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "worker"},
},
},
}

rows := buildRows("aws", "us-east-1", machines)

if len(rows) != 1 {
t.Fatalf("expected 1 row (worker with no spec skipped), got %d: %+v", len(rows), rows)
}
}