Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ These metrics are exposed by Celeborn worker.
| CommitFilesFailCount | The count of commit files request failed in current worker. |
| SlotsAllocated | Slots allocated in last hour. |
| ActiveSlotsCount | The number of slots currently being used in a worker. |
| AvailableSlotsCount | The number of slots currently available in a worker. |
| ReserveSlotsTime | ReserveSlots means acquire a disk buffer and record partition location. |
| ActiveConnectionCount | The count of active network connection. |
| NettyMemory | The total amount of off-heap memory used by celeborn worker. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,9 @@ private[celeborn] class Worker(
workerSource.addGauge(WorkerSource.ACTIVE_SLOTS_COUNT) { () =>
workerInfo.usedSlots()
}
workerSource.addGauge(WorkerSource.AVAILABLE_SLOTS_COUNT) { () =>
workerInfo.totalAvailableSlots()
}
workerSource.addGauge(WorkerSource.IS_DECOMMISSIONING_WORKER) { () =>
if (shutdown.get() && (workerStatusManager.currentWorkerStatus.getState == State.InDecommission ||
workerStatusManager.currentWorkerStatus.getState == State.InDecommissionThenIdle)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ object WorkerSource {
// slots
val SLOTS_ALLOCATED = "SlotsAllocated"
val ACTIVE_SLOTS_COUNT = "ActiveSlotsCount"
val AVAILABLE_SLOTS_COUNT = "AvailableSlotsCount"
val RESERVE_SLOTS_TIME = "ReserveSlotsTime"

// connection
Expand Down
Loading