Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ env:
CI: true

jobs:
build:
runs-on: ubuntu-latest
build:
runs-on: ${{ matrix.os }}

strategy:
matrix:
Expand Down Expand Up @@ -50,3 +50,14 @@ jobs:

- name: Test
run: cargo test --verbose --all-features

- name: Layout snapshot
run: cargo test layout_snapshot --all-features -- --nocapture

- name: Upload layout artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: layout-${{ matrix.os }}-${{ matrix.rust }}
path: target/layout-artifacts
if-no-files-found: ignore
110 changes: 110 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: Release

on:
push:
tags:
- "v*.*.*"
workflow_dispatch:
inputs:
version:
description: "Version to release, for example 1.1.0"
required: true
type: string

permissions:
contents: write

Comment on lines +1 to +16
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: full

jobs:
publish:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Rust
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable

- name: Resolve release version
id: version
shell: bash
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
VERSION="${{ inputs.version }}"
else
VERSION="${GITHUB_REF_NAME#v}"
fi

if [[ ! "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+([+-][0-9A-Za-z.-]+)?$ ]]; then
echo "Invalid release version: $VERSION" >&2
exit 1
fi

MANIFEST_VERSION="$(cargo metadata --no-deps --format-version 1 | python3 -c 'import json, sys; print(json.load(sys.stdin)["packages"][0]["version"])')"
if [[ "$MANIFEST_VERSION" != "$VERSION" ]]; then
echo "Cargo.toml version $MANIFEST_VERSION does not match release version $VERSION" >&2
exit 1
fi

echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "tag=v$VERSION" >> "$GITHUB_OUTPUT"

- name: Create tag for manual release
if: github.event_name == 'workflow_dispatch'
shell: bash
run: |
TAG="${{ steps.version.outputs.tag }}"
CURRENT_SHA="$(git rev-parse HEAD)"

if git rev-parse "$TAG" >/dev/null 2>&1; then
TAG_SHA="$(git rev-list -n 1 "$TAG")"
if [[ "$TAG_SHA" != "$CURRENT_SHA" ]]; then
echo "Tag $TAG already exists at $TAG_SHA, not current HEAD $CURRENT_SHA" >&2
exit 1
fi
else
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git tag "$TAG"
git push origin "$TAG"
fi

- name: Check formatting
run: cargo fmt -- --check

- name: Lint
run: cargo clippy --all-features -- -D warnings

- name: Test all features
run: cargo test --all-features

- name: Test no-default feature matrix
run: cargo test --no-default-features --features serde,bytes,simd

- name: Package crate
run: cargo package

- name: Publish crate to crates.io
run: cargo publish --token "$CARGO_REGISTRY_TOKEN"
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}

- name: Create GitHub release
env:
GH_TOKEN: ${{ github.token }}
TAG: ${{ steps.version.outputs.tag }}
VERSION: ${{ steps.version.outputs.version }}
run: |
gh release create "$TAG" \
"target/package/cheetah-string-$VERSION.crate#cheetah-string-$VERSION.crate" \
--verify-tag \
--title "cheetah-string $TAG" \
--generate-notes
38 changes: 33 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cheetah-string"
version = "1.0.1"
version = "1.1.0"
authors = ["mxsm <mxsm@apache.org>"]
Comment on lines 1 to 4
edition = "2021"
homepage = "https://github.com/mxsm/cheetah-string"
Expand All @@ -15,18 +15,22 @@ A lightweight, high-performance string manipulation library optimized for speed-
"""

[dependencies]
bytes = "1.10.0"
bytes = { version = "1.10.0", optional = true, default-features = false }
memchr = { version = "2", default-features = false }
serde = { version = "1.0", optional = true, default-features = false, features = ["alloc"] }

[features]
default = ["std"]
std = []
serde = ["serde/alloc"]
bytes = []
std = ["memchr/std"]
serde = ["dep:serde", "serde/alloc"]
bytes = ["dep:bytes"]
simd = []

[dev-dependencies]
compact_str = "0.8"
criterion = { version = "0.5", features = ["html_reports"] }
serde_json = "1.0"
smartstring = "1.0"


[[bench]]
Expand All @@ -37,6 +41,30 @@ harness = false
name = "comprehensive"
harness = false

[[bench]]
name = "layout"
harness = false

[[bench]]
name = "mutation"
harness = false

[[bench]]
name = "mq_properties"
harness = false

[[bench]]
name = "mq_remoting_header"
harness = false

[[bench]]
name = "mq_topic"
harness = false

[[bench]]
name = "pattern"
harness = false

[[bench]]
name = "simd"
harness = false
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ CheetahString is a versatile string type that goes beyond the standard library's
- **⚡ Performance Focused**
- Optimized for common string operations
- Reduced memory allocations via intelligent internal representation
- Optional SIMD acceleration for string matching operations (x86_64 SSE2)
- `memchr`/`memmem` substring search by default
- Optional SIMD acceleration for selected byte comparisons (x86_64 SSE2)
- Benchmarked against standard library types

- **🛡️ Safe & Correct**
Expand All @@ -45,19 +46,19 @@ Add this to your `Cargo.toml`:

```toml
[dependencies]
cheetah-string = "1.0.0"
cheetah-string = "1.1.0"
```

### Optional Features

```toml
[dependencies]
cheetah-string = { version = "1.0.0", features = ["bytes", "serde", "simd"] }
cheetah-string = { version = "1.1.0", features = ["bytes", "serde", "simd"] }
```

Available features:
- `std` (default): Enable standard library support
- `bytes`: Integration with the `bytes` crate
- `bytes`: `CheetahBytes` and integration with the `bytes` crate
- `serde`: Serialization support via serde
- `simd`: SIMD-accelerated string operations (x86_64 SSE2)

Expand Down Expand Up @@ -106,7 +107,7 @@ CheetahString is designed with performance in mind:
- **Efficient Sharing**: Large immutable strings use `Arc<str>` for cheap cloning
- **Fast Builders**: Capacity-preserving builder paths use owned heap storage for direct mutation
- **Optimized Operations**: Common operations like concatenation have fast-path implementations
- **SIMD Acceleration** (with `simd` feature): String matching operations (`starts_with`, `ends_with`, `contains`, `find`, equality comparisons) are accelerated using SSE2 SIMD instructions on x86_64 platforms. The implementation automatically falls back to scalar code for small inputs or when SIMD is not available.
- **Search Acceleration**: Substring search uses `memchr`/`memmem` by default. With the `simd` feature, selected byte comparisons such as prefix, suffix, and equality paths can use SSE2 on x86_64 platforms.

Run benchmarks:
```bash
Expand Down Expand Up @@ -137,6 +138,7 @@ CheetahString intelligently chooses the most efficient storage:
- `from_static_str(s)` - Zero-cost wrapper for `'static str`
- `from_string(s)` - From owned `String`
- `try_from_bytes(b)` - Safe construction from bytes with UTF-8 validation
- `CheetahBytes` - Byte-oriented companion type available with the `bytes` feature
- `with_capacity(n)` - Pre-allocate capacity

### Query Methods
Expand Down
53 changes: 53 additions & 0 deletions bench-results/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Benchmark Artifacts

This directory defines the artifact layout for performance-sensitive changes.
Generated benchmark output should be committed only when it is intentionally
used as review evidence for a release or PR.

Recommended layout:

```text
bench-results/
layout/
current.json
v1.1.json
v1.2.json
v2-packed.json
criterion/
before/
after/
mq/
topic.json
properties.json
remoting-header.json
summaries/
summary-v1.1-v1.2.md
summary-v1.2-v2-packed.md
```

Minimum metadata for generated JSON artifacts:

```json
{
"crate": "cheetah-string",
"version": "1.1.0",
"profile": "release",
"target": "x86_64-unknown-linux-gnu",
"rustc": "rustc 1.xx.x",
"os": "linux",
"cpu": "model name",
"bench": "layout"
}
```

For local capture, run:

```bash
scripts/bench-all.sh current
```

On Windows PowerShell:

```powershell
scripts/bench-all.ps1 current
```
8 changes: 4 additions & 4 deletions benches/comprehensive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,13 +362,13 @@ fn bench_internal_hot_paths(c: &mut Criterion) {
});

let long_bytes = vec![b'a'; 256];
group.bench_function("CheetahString::from(Vec<u8> 256B)", |b| {
b.iter(|| black_box(CheetahString::from(long_bytes.clone())))
group.bench_function("CheetahString::try_from_vec(256B)", |b| {
b.iter(|| black_box(CheetahString::try_from_vec(long_bytes.clone()).unwrap()))
});

group.bench_function("String::from(CheetahString::from(Vec<u8> 256B))", |b| {
group.bench_function("String::from(CheetahString::try_from_vec(256B))", |b| {
b.iter(|| {
let value = CheetahString::from(long_bytes.clone());
let value = CheetahString::try_from_vec(long_bytes.clone()).unwrap();
black_box(String::from(value))
})
});
Expand Down
57 changes: 57 additions & 0 deletions benches/layout.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use cheetah_string::CheetahString;
use std::env;
use std::fs;
use std::mem::{align_of, size_of};
use std::path::PathBuf;

fn target_dir() -> PathBuf {
env::var_os("CARGO_TARGET_DIR")
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("target"))
}

fn layout_entry<T>(name: &str) -> String {
format!(
r#"{{"type":"{}","size":{},"align":{}}}"#,
name,
size_of::<T>(),
align_of::<T>()
)
}

fn main() {
let layouts = [
layout_entry::<CheetahString>("CheetahString"),
layout_entry::<Option<CheetahString>>("Option<CheetahString>"),
layout_entry::<String>("String"),
layout_entry::<Option<String>>("Option<String>"),
layout_entry::<&str>("&str"),
layout_entry::<Option<&str>>("Option<&str>"),
layout_entry::<std::sync::Arc<str>>("Arc<str>"),
layout_entry::<Option<std::sync::Arc<str>>>("Option<Arc<str>>"),
];

let snapshot = format!(
concat!(
"{{\n",
" \"crate\":\"cheetah-string\",\n",
" \"profile\":\"bench\",\n",
" \"target_arch\":\"{}\",\n",
" \"target_os\":\"{}\",\n",
" \"pointer_width\":\"{}\",\n",
" \"layouts\":[\n {}\n ]\n",
"}}\n"
),
env::consts::ARCH,
env::consts::OS,
std::mem::size_of::<usize>() * 8,
layouts.join(",\n ")
);

let artifact_dir = target_dir().join("layout-artifacts");
fs::create_dir_all(&artifact_dir).expect("create layout artifact directory");
fs::write(artifact_dir.join("layout-bench.json"), &snapshot)
.expect("write layout bench artifact");

println!("{snapshot}");
}
Loading
Loading