Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion src/control/cmd/dmg/storage.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
//
// (C) Copyright 2019-2023 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//

package main

import (
"context"
"strings"

"github.com/pkg/errors"
Expand Down Expand Up @@ -121,16 +122,56 @@ func (cmd *storageFormatCmd) Execute(args []string) (err error) {

resp, err := control.StorageFormat(ctx, cmd.ctlInvoker, req)
if err != nil {
// If replace operation failed, wipe storage to prevent partial formatting.
if cmd.Replace {
cmd.Errorf("Storage format with --replace failed: %v", err)
cmd.Info("Attempting to erase partially formatted storage...")
if eraseErr := cmd.eraseStorageOnFailure(ctx); eraseErr != nil {
cmd.Errorf("Failed to erase storage after format failure: %v", eraseErr)
return errors.Wrap(err, "storage format failed and cleanup also failed")
}
cmd.Info("Storage successfully erased after format failure")
}
return err
}

// Check if there were any format errors in the response for replace operation.
if cmd.Replace && resp.Errors() != nil {
cmd.Errorf("Storage format with --replace encountered errors")
cmd.Info("Attempting to erase partially formatted storage...")
if eraseErr := cmd.eraseStorageOnFailure(ctx); eraseErr != nil {
cmd.Errorf("Failed to erase storage after format failure: %v", eraseErr)
// Still return the original format error.
} else {
cmd.Info("Storage successfully erased after format failure")
}
}

if cmd.JSONOutputEnabled() {
return cmd.OutputJSON(resp, resp.Errors())
}

return cmd.printFormatResp(resp)
}

// eraseStorageOnFailure attempts to wipe storage using SystemErase API when format fails.
// This ensures that partially formatted storage doesn't leave the system in an inconsistent state.
func (cmd *storageFormatCmd) eraseStorageOnFailure(ctx context.Context) error {
cmd.Debugf("Invoking SystemErase to clean up after failed format operation")

eraseReq := &control.SystemEraseReq{}
eraseResp, err := control.SystemErase(ctx, cmd.ctlInvoker, eraseReq)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this will work... SystemErase doesn't allow you to choose ranks or nodes.

I think you'll need to handle this from the daos_server that owns the engine. If the engine fails to join, and it's a replace operation, blow the storage away. The failure that triggered this request was happening at the join stage.

If the format itself fails, I don't think there's any risk of the engine coming up. If there's a partial failure, it's not a bad idea to clean up, but I think that would have to happen from the server side, too.

if err != nil {
return errors.Wrap(err, "system erase")
}

if eraseResp.Errors() != nil {
return errors.Wrap(eraseResp.Errors(), "system erase reported errors")
}

return nil
}

func (cmd *storageFormatCmd) printFormatResp(resp *control.StorageFormatResp) error {
var outErr strings.Builder
if err := pretty.PrintResponseErrors(resp, &outErr); err != nil {
Expand Down
Loading