diff --git a/README.md b/README.md index 79276d2..b6dd4f6 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ CheetahString is a versatile string type that goes beyond the standard library's - Efficient Arc-based sharing for larger strings - **🔧 Rich API** + - Type split: `CheetahStr` for immutable clone-cheap keys and `CheetahBuilder` for append-heavy construction - Query methods: `starts_with`, `ends_with`, `contains`, `find`, `rfind` - Transformation: `to_uppercase`, `to_lowercase`, `replace`, `trim` - Iteration: `split`, `lines`, `chars` @@ -66,7 +67,7 @@ Available features: ## 🚀 Quick Start ```rust -use cheetah_string::CheetahString; +use cheetah_string::{CheetahBuilder, CheetahStr, CheetahString}; // Create from various sources let s1 = CheetahString::from("hello"); // From &str @@ -90,16 +91,23 @@ let name = CheetahString::from(" Rust"); let message = greeting + name.as_str(); // "Hello Rust" // Builder pattern for efficient construction -let mut builder = CheetahString::with_capacity(100); -builder.push_str("Hello"); -builder.push_str(", "); -builder.push_str("World!"); +let mut string_builder = CheetahString::with_capacity(100); +string_builder.push_str("Hello"); +string_builder.push_str(", "); +string_builder.push_str("World!"); // Explicit String storage policy let mut owned = CheetahString::from_string_owned(String::with_capacity(128)); owned.push_str("capacity-preserving"); let shared = CheetahString::from_string_shared("clone-cheap".repeat(16)); +// v2 type split +let topic = CheetahStr::from_static_str("orders-created"); +let mut route_builder = CheetahBuilder::with_capacity(64); +route_builder.push_str(topic.as_str()); +route_builder.push_str(":partition-0"); +let route_key = route_builder.finish_str(); + // Safe UTF-8 validation let bytes = b"hello"; let s = CheetahString::try_from_bytes(bytes).unwrap(); @@ -135,6 +143,16 @@ CheetahString intelligently chooses the most efficient storage: | Owned | `String` | 1 | Reserved capacity, repeated mutation | | Bytes | `CheetahBytes` | 1 | Byte-oriented network buffers (with feature) | +For new code, use: + +| Type | Role | +|------|------| +| `CheetahStr` | Immutable clone-cheap values such as topics, groups, names, and keys | +| `CheetahString` | Mutable string value with the 1.x compatibility API | +| `CheetahBuilder` | Append-heavy construction followed by `finish_string()` or `finish_str()` | +| `CheetahFinder` | Reusable substring search | +| `CheetahBytes` | Byte semantics without a UTF-8 promise | + ## 🔧 API Overview ### Construction @@ -145,6 +163,8 @@ CheetahString intelligently chooses the most efficient storage: - `from_string_owned(s)` - Preserve `String` ownership and spare capacity for mutation - `from_string_shared(s)` - Convert long owned strings to clone-cheap shared storage - `try_from_bytes(b)` - Safe construction from bytes with UTF-8 validation +- `CheetahStr` - Immutable clone-cheap string companion +- `CheetahBuilder` - Append-heavy builder companion - `CheetahBytes` - Byte-oriented companion type available with the `bytes` feature - `with_capacity(n)` - Pre-allocate capacity diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 0000000..f66f5b1 --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,146 @@ +use alloc::string::String; +use core::fmt; + +use crate::{CheetahStr, CheetahString}; + +/// Append-heavy builder for constructing Cheetah string values. +/// +/// `CheetahBuilder` keeps mutable construction separate from immutable +/// clone-cheap `CheetahStr` values and stable string values. +#[derive(Clone, Default)] +pub struct CheetahBuilder { + inner: String, +} + +impl CheetahBuilder { + /// Creates an empty builder. + #[inline] + pub fn new() -> Self { + Self { + inner: String::new(), + } + } + + /// Creates an empty builder with at least `capacity` bytes. + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Self { + inner: String::with_capacity(capacity), + } + } + + /// Creates a builder from existing owned storage. + #[inline] + pub fn from_string(value: String) -> Self { + Self { inner: value } + } + + /// Appends a string slice. + #[inline] + pub fn push_str(&mut self, value: &str) { + self.inner.push_str(value); + } + + /// Appends a character. + #[inline] + pub fn push(&mut self, value: char) { + self.inner.push(value); + } + + /// Reserves capacity for at least `additional` more bytes. + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional); + } + + /// Clears the current contents while preserving capacity. + #[inline] + pub fn clear(&mut self) { + self.inner.clear(); + } + + /// Returns the current contents. + #[inline] + pub fn as_str(&self) -> &str { + self.inner.as_str() + } + + /// Returns the current length in bytes. + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + /// Returns whether the builder is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + /// Returns the allocated capacity in bytes. + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Finishes into a mutable string value, preserving spare capacity when it + /// is useful for subsequent mutation. + #[inline] + pub fn finish_string(self) -> CheetahString { + CheetahString::from_string_owned(self.inner) + } + + /// Finishes into an immutable clone-cheap string value. + #[inline] + pub fn finish_str(self) -> CheetahStr { + CheetahStr::from_string(self.inner) + } + + /// Returns the owned `String` backing this builder. + #[inline] + pub fn into_string(self) -> String { + self.inner + } +} + +impl From for CheetahBuilder { + #[inline] + fn from(value: String) -> Self { + Self::from_string(value) + } +} + +impl From<&str> for CheetahBuilder { + #[inline] + fn from(value: &str) -> Self { + let mut builder = Self::with_capacity(value.len()); + builder.push_str(value); + builder + } +} + +impl Extend for CheetahBuilder { + #[inline] + fn extend>(&mut self, iter: T) { + self.inner.extend(iter); + } +} + +impl<'a> Extend<&'a str> for CheetahBuilder { + #[inline] + fn extend>(&mut self, iter: T) { + for item in iter { + self.push_str(item); + } + } +} + +impl fmt::Debug for CheetahBuilder { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CheetahBuilder") + .field("value", &self.inner) + .field("capacity", &self.inner.capacity()) + .finish() + } +} diff --git a/src/cheetah_str.rs b/src/cheetah_str.rs new file mode 100644 index 0000000..2d6020b --- /dev/null +++ b/src/cheetah_str.rs @@ -0,0 +1,286 @@ +use alloc::borrow::Cow; +use alloc::string::{ParseError, String, ToString}; +use alloc::sync::Arc; +use core::borrow::Borrow; +use core::cmp::Ordering; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::ops::Deref; +use core::str::{self, FromStr}; + +use crate::CheetahString; + +const INLINE_CAPACITY: usize = 23; + +/// Immutable, clone-cheap string value for key/name/topic style workloads. +/// +/// `CheetahStr` intentionally has no mutation API. Use [`CheetahBuilder`] or +/// [`CheetahString`] when the value is still being constructed. +/// +/// [`CheetahBuilder`]: crate::CheetahBuilder +#[derive(Clone)] +pub struct CheetahStr { + inner: Repr, +} + +#[derive(Clone)] +enum Repr { + Inline { + len: u8, + data: [u8; INLINE_CAPACITY], + }, + Static(&'static str), + Shared(Arc), +} + +impl CheetahStr { + /// Creates an empty immutable string. + #[inline] + pub const fn empty() -> Self { + Self { + inner: Repr::Inline { + len: 0, + data: [0; INLINE_CAPACITY], + }, + } + } + + /// Creates an empty immutable string. + #[inline] + pub fn new() -> Self { + Self::empty() + } + + /// Creates a zero-copy immutable string from a static string slice. + #[inline] + pub const fn from_static_str(s: &'static str) -> Self { + Self { + inner: Repr::Static(s), + } + } + + /// Creates an immutable string from a borrowed string slice. + #[inline] + pub fn from_slice(s: &str) -> Self { + if s.len() <= INLINE_CAPACITY { + let mut data = [0u8; INLINE_CAPACITY]; + data[..s.len()].copy_from_slice(s.as_bytes()); + Self { + inner: Repr::Inline { + len: s.len() as u8, + data, + }, + } + } else { + Self { + inner: Repr::Shared(Arc::from(s)), + } + } + } + + /// Creates an immutable string from owned storage. + #[inline] + pub fn from_string(s: String) -> Self { + if s.len() <= INLINE_CAPACITY { + Self::from_slice(&s) + } else { + Self { + inner: Repr::Shared(s.into_boxed_str().into()), + } + } + } + + /// Returns the string slice. + #[inline] + pub fn as_str(&self) -> &str { + match &self.inner { + Repr::Inline { len, data } => { + // SAFETY: Inline data is copied only from valid UTF-8 strings. + unsafe { str::from_utf8_unchecked(&data[..*len as usize]) } + } + Repr::Static(s) => s, + Repr::Shared(s) => s.as_ref(), + } + } + + /// Returns the UTF-8 bytes. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + self.as_str().as_bytes() + } + + /// Returns the byte length. + #[inline] + pub fn len(&self) -> usize { + self.as_str().len() + } + + /// Returns whether this string is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } +} + +impl Default for CheetahStr { + #[inline] + fn default() -> Self { + Self::empty() + } +} + +impl From<&str> for CheetahStr { + #[inline] + fn from(value: &str) -> Self { + Self::from_slice(value) + } +} + +impl From for CheetahStr { + #[inline] + fn from(value: String) -> Self { + Self::from_string(value) + } +} + +impl From> for CheetahStr { + #[inline] + fn from(value: Cow<'static, str>) -> Self { + match value { + Cow::Borrowed(s) => Self::from_static_str(s), + Cow::Owned(s) => Self::from_string(s), + } + } +} + +impl From<&CheetahString> for CheetahStr { + #[inline] + fn from(value: &CheetahString) -> Self { + Self::from_slice(value.as_str()) + } +} + +impl From for CheetahStr { + #[inline] + fn from(value: CheetahString) -> Self { + Self::from_string(String::from(value)) + } +} + +impl From for String { + #[inline] + fn from(value: CheetahStr) -> Self { + value.as_str().to_string() + } +} + +impl FromStr for CheetahStr { + type Err = ParseError; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(Self::from_slice(s)) + } +} + +impl Deref for CheetahStr { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl AsRef for CheetahStr { + #[inline] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl AsRef<[u8]> for CheetahStr { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl Borrow for CheetahStr { + #[inline] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl fmt::Display for CheetahStr { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_str().fmt(f) + } +} + +impl fmt::Debug for CheetahStr { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl Hash for CheetahStr { + #[inline] + fn hash(&self, state: &mut H) { + self.as_str().hash(state); + } +} + +impl PartialEq for CheetahStr { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} + +impl PartialEq for CheetahStr { + #[inline] + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq<&str> for CheetahStr { + #[inline] + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +impl PartialEq for str { + #[inline] + fn eq(&self, other: &CheetahStr) -> bool { + self == other.as_str() + } +} + +impl PartialEq for &str { + #[inline] + fn eq(&self, other: &CheetahStr) -> bool { + *self == other.as_str() + } +} + +impl Eq for CheetahStr {} + +impl PartialOrd for CheetahStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for CheetahStr { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 96dc635..41e45a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ //! No more relying solely on the standard library's String! CheetahString is a versatile string type that can store static and dynamic strings. //! It is usable in both `std` and `no_std` environments. Additionally, CheetahString supports serde for serialization and deserialization. +//! `CheetahStr` is available for immutable clone-cheap string values, and +//! `CheetahBuilder` is available for append-heavy construction. //! The `bytes` feature exposes `CheetahBytes` for byte-oriented data. //! It minimizes allocations across small, shared, and builder-oriented string workloads. //! The `from_string_owned` and `from_string_shared` constructors make owned @@ -60,6 +62,8 @@ //! extern crate alloc; +mod builder; +mod cheetah_str; mod cheetah_string; mod error; mod search; @@ -80,6 +84,8 @@ pub mod packed; #[cfg(feature = "bytes")] pub use cheetah_bytes::CheetahBytes; +pub use builder::CheetahBuilder; +pub use cheetah_str::CheetahStr; pub use cheetah_string::{CheetahString, SplitPattern, SplitStr, SplitWrapper, StrPattern}; pub use error::{Error, Result}; pub use search::CheetahFinder; diff --git a/tests/type_split.rs b/tests/type_split.rs new file mode 100644 index 0000000..616c157 --- /dev/null +++ b/tests/type_split.rs @@ -0,0 +1,54 @@ +use cheetah_string::{CheetahBuilder, CheetahStr, CheetahString}; +use std::collections::HashMap; + +#[test] +fn cheetah_str_keeps_long_clones_shared() { + let value = CheetahStr::from("topic.".repeat(32)); + let cloned = value.clone(); + + assert_eq!(value, cloned); + assert_eq!(value.as_bytes().as_ptr(), cloned.as_bytes().as_ptr()); +} + +#[test] +fn cheetah_str_works_as_hash_map_key() { + let mut routes = HashMap::new(); + routes.insert(CheetahStr::from_static_str("topic-a"), 7); + + assert_eq!(routes.get("topic-a"), Some(&7)); +} + +#[test] +fn builder_finishes_to_mutable_string_with_spare_capacity() { + let mut builder = CheetahBuilder::with_capacity(128); + builder.push_str("hello"); + let before = builder.as_str().as_bytes().as_ptr(); + + let mut value = builder.finish_string(); + value.push_str(" world"); + + assert_eq!(value, "hello world"); + assert_eq!(value.as_bytes().as_ptr(), before); +} + +#[test] +fn builder_finishes_to_clone_cheap_str() { + let mut builder = CheetahBuilder::new(); + builder.push_str(&"broker-".repeat(32)); + + let value = builder.finish_str(); + let cloned = value.clone(); + + assert_eq!(value, cloned); + assert_eq!(value.as_bytes().as_ptr(), cloned.as_bytes().as_ptr()); +} + +#[test] +fn cheetah_string_can_be_compacted_into_cheetah_str() { + let mut value = CheetahString::with_capacity(64); + value.push_str("consumer-group"); + + let compact = CheetahStr::from(value); + + assert_eq!(compact, "consumer-group"); +}