-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat(swarm): expose ConnectionId and add conn duration metric
#3927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
56b3fe3
d085266
fae6f20
832a897
feffc61
468bbe1
02d7ac7
9578315
fcbd309
8a773dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,20 +18,25 @@ | |
| // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| // DEALINGS IN THE SOFTWARE. | ||
|
|
||
| use std::collections::HashMap; | ||
| use std::sync::{Arc, Mutex}; | ||
|
|
||
| use crate::protocol_stack; | ||
| use instant::Instant; | ||
| use libp2p_swarm::ConnectionId; | ||
| use prometheus_client::encoding::{EncodeLabelSet, EncodeLabelValue}; | ||
| use prometheus_client::metrics::counter::Counter; | ||
| use prometheus_client::metrics::family::Family; | ||
| use prometheus_client::metrics::histogram::{exponential_buckets, Histogram}; | ||
| use prometheus_client::registry::Registry; | ||
| use prometheus_client::registry::{Registry, Unit}; | ||
|
|
||
| pub(crate) struct Metrics { | ||
| connections_incoming: Family<AddressLabels, Counter>, | ||
| connections_incoming_error: Family<IncomingConnectionErrorLabels, Counter>, | ||
|
|
||
| connections_established: Family<ConnectionEstablishedLabels, Counter>, | ||
| connections_establishment_duration: Family<ConnectionEstablishmentDurationLabels, Histogram>, | ||
| connections_closed: Family<ConnectionClosedLabels, Counter>, | ||
| connections_established: Family<ConnectionLabels, Counter>, | ||
| connections_establishment_duration: Family<ConnectionLabels, Histogram>, | ||
| connections_duration: Family<ConnectionClosedLabels, Histogram>, | ||
|
|
||
| new_listen_addr: Family<AddressLabels, Counter>, | ||
| expired_listen_addr: Family<AddressLabels, Counter>, | ||
|
|
@@ -41,6 +46,8 @@ pub(crate) struct Metrics { | |
|
|
||
| dial_attempt: Counter, | ||
| outgoing_connection_error: Family<OutgoingConnectionErrorLabels, Counter>, | ||
|
|
||
| connections: Arc<Mutex<HashMap<ConnectionId, Instant>>>, | ||
| } | ||
|
|
||
| impl Metrics { | ||
|
|
@@ -110,34 +117,42 @@ impl Metrics { | |
| connections_established.clone(), | ||
| ); | ||
|
|
||
| let connections_closed = Family::default(); | ||
| let connections_establishment_duration = { | ||
| let constructor: fn() -> Histogram = | ||
| || Histogram::new(exponential_buckets(0.01, 1.5, 20)); | ||
| Family::new_with_constructor(constructor) | ||
| }; | ||
| sub_registry.register( | ||
| "connections_closed", | ||
| "Number of connections closed", | ||
| connections_closed.clone(), | ||
| "connections_establishment_duration", | ||
| "Time it took (locally) to establish connections", | ||
| connections_establishment_duration.clone(), | ||
| ); | ||
|
|
||
| let connections_establishment_duration = Family::new_with_constructor( | ||
| create_connection_establishment_duration_histogram as fn() -> Histogram, | ||
| ); | ||
| sub_registry.register( | ||
| let connections_duration = { | ||
| let constructor: fn() -> Histogram = | ||
| || Histogram::new(exponential_buckets(0.01, 3.0, 20)); | ||
| Family::new_with_constructor(constructor) | ||
| }; | ||
| sub_registry.register_with_unit( | ||
| "connections_establishment_duration", | ||
| "Time it took (locally) to establish connections", | ||
| Unit::Seconds, | ||
| connections_establishment_duration.clone(), | ||
| ); | ||
|
|
||
| Self { | ||
| connections_incoming, | ||
| connections_incoming_error, | ||
| connections_established, | ||
| connections_closed, | ||
| new_listen_addr, | ||
| expired_listen_addr, | ||
| listener_closed, | ||
| listener_error, | ||
| dial_attempt, | ||
| outgoing_connection_error, | ||
| connections_establishment_duration, | ||
| connections_duration, | ||
| connections: Default::default(), | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -149,24 +164,44 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE | |
| libp2p_swarm::SwarmEvent::ConnectionEstablished { | ||
| endpoint, | ||
| established_in: time_taken, | ||
| connection_id, | ||
| .. | ||
| } => { | ||
| let labels = ConnectionEstablishedLabels { | ||
| let labels = ConnectionLabels { | ||
| role: endpoint.into(), | ||
| protocols: protocol_stack::as_string(endpoint.get_remote_address()), | ||
| }; | ||
| self.connections_established.get_or_create(&labels).inc(); | ||
| self.connections_establishment_duration | ||
| .get_or_create(&labels) | ||
| .observe(time_taken.as_secs_f64()); | ||
| self.connections | ||
| .lock() | ||
| .expect("lock not to be poisoned") | ||
| .insert(*connection_id, Instant::now()); | ||
| } | ||
| libp2p_swarm::SwarmEvent::ConnectionClosed { endpoint, .. } => { | ||
| self.connections_closed | ||
| .get_or_create(&ConnectionClosedLabels { | ||
| libp2p_swarm::SwarmEvent::ConnectionClosed { | ||
| endpoint, | ||
| connection_id, | ||
| cause, | ||
| .. | ||
| } => { | ||
| let labels = ConnectionClosedLabels { | ||
| connection: ConnectionLabels { | ||
| role: endpoint.into(), | ||
| protocols: protocol_stack::as_string(endpoint.get_remote_address()), | ||
| }) | ||
| .inc(); | ||
| }, | ||
| cause: cause.as_ref().expect("TODO remove see definition").into(), | ||
| }; | ||
| self.connections_duration.get_or_create(&labels).observe( | ||
| self.connections | ||
| .lock() | ||
| .expect("lock not to be poisoned") | ||
| .remove(connection_id) | ||
| .expect("closed connection to previously be established") | ||
| .elapsed() | ||
| .as_secs_f64(), | ||
| ); | ||
| } | ||
| libp2p_swarm::SwarmEvent::IncomingConnection { send_back_addr, .. } => { | ||
| self.connections_incoming | ||
|
|
@@ -187,7 +222,7 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE | |
| }) | ||
| .inc(); | ||
| } | ||
| libp2p_swarm::SwarmEvent::OutgoingConnectionError { error, peer_id } => { | ||
| libp2p_swarm::SwarmEvent::OutgoingConnectionError { error, peer_id, .. } => { | ||
| let peer = match peer_id { | ||
| Some(_) => PeerStatus::Known, | ||
| None => PeerStatus::Unknown, | ||
|
|
@@ -261,25 +296,42 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE | |
| libp2p_swarm::SwarmEvent::ListenerError { .. } => { | ||
| self.listener_error.inc(); | ||
| } | ||
| libp2p_swarm::SwarmEvent::Dialing(_) => { | ||
| libp2p_swarm::SwarmEvent::Dialing { .. } => { | ||
| self.dial_attempt.inc(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)] | ||
| struct ConnectionEstablishedLabels { | ||
| struct ConnectionLabels { | ||
| role: Role, | ||
| protocols: String, | ||
| } | ||
|
|
||
| type ConnectionEstablishmentDurationLabels = ConnectionEstablishedLabels; | ||
|
|
||
| #[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)] | ||
| struct ConnectionClosedLabels { | ||
| role: Role, | ||
| protocols: String, | ||
| // TODO: Should be Option<ConnectionError>. Needs https://github.com/prometheus/client_rust/pull/137 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we wait for this before merging here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oversight. Fixed with 9578315. Thanks for the catch Thomas! |
||
| cause: ConnectionError, | ||
| #[prometheus(flatten)] | ||
| connection: ConnectionLabels, | ||
| } | ||
|
|
||
| #[derive(EncodeLabelValue, Hash, Clone, Eq, PartialEq, Debug)] | ||
| enum ConnectionError { | ||
| Io, | ||
| KeepAliveTimeout, | ||
| Handler, | ||
| } | ||
|
|
||
| impl<E> From<&libp2p_swarm::ConnectionError<E>> for ConnectionError { | ||
| fn from(value: &libp2p_swarm::ConnectionError<E>) -> Self { | ||
| match value { | ||
| libp2p_swarm::ConnectionError::IO(_) => ConnectionError::Io, | ||
| libp2p_swarm::ConnectionError::KeepAliveTimeout => ConnectionError::KeepAliveTimeout, | ||
| libp2p_swarm::ConnectionError::Handler(_) => ConnectionError::Handler, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)] | ||
|
|
@@ -359,7 +411,3 @@ impl From<&libp2p_swarm::ListenError> for IncomingConnectionError { | |
| } | ||
| } | ||
| } | ||
|
|
||
| fn create_connection_establishment_duration_histogram() -> Histogram { | ||
| Histogram::new(exponential_buckets(0.01, 1.5, 20)) | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.