Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
- Unreleased
- Expose V8 ScriptCompiler::CachedData via Context#compile / MiniRacer::Script (#411)
- script = ctx.compile(src, filename:, cached_data:) → Script handle
- script.run replays compiled bytecode without re-parsing
- script.cached_data persists V8's per-script bytecode cache
- script.cache_rejected? reports source/version mismatches
- MiniRacer::V8_CACHED_DATA_VERSION_TAG for cache-key invalidation
- TruffleRuby shim falls back to source replay (no equivalent in GraalJS)

- 0.21.0 - 16-04-2026
- Add MiniRacer::Binary for returning Uint8Array to JavaScript from attached Ruby callbacks

Expand Down
58 changes: 58 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,64 @@ context.eval("bar()", filename: "a/bar.js")
# …
```

### Persistent bytecode cache for repeated script evaluation

`Context#compile` returns a `MiniRacer::Script` handle you can run multiple times,
and exposes V8's bytecode cache so you can skip re-parsing on subsequent processes:

```ruby
context = MiniRacer::Context.new
script = context.compile(File.read("bundle.js"), filename: "bundle.js")

# The first compile produces a cache blob you can persist to disk.
File.binwrite("bundle.js.cache", script.cached_data) if script.cached_data
script.run
```

On a later process, pass the saved blob back to skip the parse step:

```ruby
cached = File.binread("bundle.js.cache") # must be binary (ASCII-8BIT) encoding
context = MiniRacer::Context.new
script = context.compile(File.read("bundle.js"),
filename: "bundle.js",
cached_data: cached)

if script.cache_rejected?
# V8 source/version mismatch — save the regenerated blob.
File.binwrite("bundle.js.cache", script.cached_data)
end

script.run
```

When the supplied `cached_data` is accepted, `script.cached_data` returns `nil` so
callers can skip a redundant disk write. When V8 produces a fresh blob (initial
compile or rejection), it returns the new bytes.

`MiniRacer::V8_CACHED_DATA_VERSION_TAG` exposes V8's
`ScriptCompiler::CachedDataVersionTag()` — mix it into your cache key alongside
the source hash so a libv8-node version bump invalidates stale blobs automatically.
The constant is populated on first `Context.new` (after `Platform.set_flags!`),
so read it after constructing at least one Context.

```ruby
key = "#{Digest::SHA256.hexdigest(source)}-#{MiniRacer::V8_CACHED_DATA_VERSION_TAG}"
```

Notes:

- A `Script` is bound to the `Context` that compiled it; reusing it on another
Context isn't supported.
- `Script#dispose` frees the underlying V8 handle eagerly. The Ruby GC finalizer
does not (taking the V8 lock from a finalizer thread risks deadlock), so
long-lived Contexts with many short-lived scripts accumulate handles until
`Context#dispose` clears them.
- On TruffleRuby, `Script` is implemented as source replay (GraalJS has no
equivalent per-script bytecode cache reachable from `Polyglot::InnerContext`),
so `cached_data` is silently ignored and always returns `nil`, and
`MiniRacer::V8_CACHED_DATA_VERSION_TAG` is `0`.

### Fork Safety

Some Ruby web servers employ forking (for example unicorn or puma in clustered mode). V8 is not fork safe by default and sadly Ruby does not have support for fork notifications per [#5446](https://bugs.ruby-lang.org/issues/5446).
Expand Down
219 changes: 219 additions & 0 deletions ext/mini_racer_extension/mini_racer_extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,20 @@ typedef struct Snapshot {
VALUE blob;
} Snapshot;

// GC-finalizer caveat: script_free cannot send a dispose RPC (would need
// to take rr_mtx without a reliable GVL guarantee). Handles freed here
// rely on State::~State() walking st.scripts at isolate teardown — so
// long-lived Contexts with many short-lived Scripts accumulate Persistents
// until the Context is disposed. Call Script#dispose explicitly to free
// eagerly.
typedef struct Script {
VALUE context; // parent Context VALUE (kept alive via mark)
VALUE cached_data; // ASCII-8BIT String or Qnil
int32_t handle_id; // 0 if uninitialized or already freed
int cache_rejected;
int disposed;
} Script;

static void context_destroy(Context *c);
static void context_free(void *arg);
static void context_mark(void *arg);
Expand Down Expand Up @@ -181,6 +195,19 @@ static const rb_data_type_t snapshot_type = {
},
};

static void script_free(void *arg);
static void script_mark(void *arg);
static size_t script_size(const void *arg);

static const rb_data_type_t script_type = {
.wrap_struct_name = "mini_racer/script",
.function = {
.dfree = script_free,
.dmark = script_mark,
.dsize = script_size,
},
};

static VALUE platform_init_error;
static VALUE context_disposed_error;
static VALUE parse_error;
Expand All @@ -192,10 +219,14 @@ static VALUE snapshot_error;
static VALUE terminated_error;
static VALUE context_class;
static VALUE snapshot_class;
static VALUE script_class;
static VALUE date_time_class;
static VALUE binary_class;
static VALUE js_function_class;

static ID id_filename;
static ID id_cached_data;

static pthread_mutex_t flags_mtx = PTHREAD_MUTEX_INITIALIZER;
static Buf flags; // protected by |flags_mtx|

Expand Down Expand Up @@ -804,9 +835,12 @@ static void dispatch1(Context *c, const uint8_t *p, size_t n)
switch (*p) {
case 'A': return v8_attach(c->pst, p+1, n-1);
case 'C': return v8_timedwait(c, p+1, n-1, v8_call);
case 'D': return v8_dispose_script(c->pst, p+1, n-1);
case 'E': return v8_timedwait(c, p+1, n-1, v8_eval);
case 'H': return v8_heap_snapshot(c->pst);
case 'K': return v8_timedwait(c, p+1, n-1, v8_compile); // (K)ompile — 'C' is taken
case 'P': return v8_pump_message_loop(c->pst);
case 'R': return v8_timedwait(c, p+1, n-1, v8_run);
case 'S': return v8_heap_stats(c->pst);
case 'T': return v8_snapshot(c->pst, p+1, n-1);
case 'W': return v8_warmup(c->pst, p+1, n-1);
Expand Down Expand Up @@ -1597,6 +1631,17 @@ static VALUE context_initialize(int argc, VALUE *argv, VALUE self)
barrier_wait(&c->early_init);
barrier_wait(&c->late_init);
}
// Deferred to first Context.new so Platform.set_flags! still has effect
// on the tag (which depends on V8 flags applied during v8_global_init).
{
static int version_tag_defined;
if (!version_tag_defined) {
VALUE m = rb_const_get(rb_cObject, rb_intern("MiniRacer"));
rb_define_const(m, "V8_CACHED_DATA_VERSION_TAG",
UINT2NUM(v8_cached_data_version_tag()));
version_tag_defined = 1;
}
}
return Qnil;
fail:
rb_raise(runtime_error, "Context.initialize: %s: %s", cause, strerror(r));
Expand Down Expand Up @@ -1730,11 +1775,175 @@ static VALUE script_error_cause(VALUE self)
return rb_iv_get(self, "@cause");
}

static VALUE context_compile(int argc, VALUE *argv, VALUE self)
{
VALUE a, e, source, filename, cached_data, kwargs;
VALUE script_v, result;
Script *script;
Context *c;
Ser s;

TypedData_Get_Struct(self, Context, &context_type, c);
rb_scan_args(argc, argv, "1:", &source, &kwargs);
Check_Type(source, T_STRING);
filename = Qnil;
cached_data = Qnil;
if (!NIL_P(kwargs)) {
filename = rb_hash_aref(kwargs, ID2SYM(id_filename));
cached_data = rb_hash_aref(kwargs, ID2SYM(id_cached_data));
}
if (NIL_P(filename))
filename = rb_str_new_cstr("<compile>");
Check_Type(filename, T_STRING);
if (!NIL_P(cached_data)) {
Check_Type(cached_data, T_STRING);
// Refuse non-binary encodings so a user reading a cache file without
// 'rb' mode gets a clear error instead of mangled bytes flowing to V8.
if (rb_enc_get(cached_data) != rb_ascii8bit_encoding())
rb_raise(rb_eEncodingError,
"cached_data must be ASCII-8BIT (binary), got %s",
rb_enc_name(rb_enc_get(cached_data)));
}
ser_init1(&s, 'K');
ser_array_begin(&s, 3);
add_string(&s, filename);
add_string(&s, source);
if (NIL_P(cached_data)) {
ser_null(&s);
} else {
ser_uint8array(&s, (const uint8_t *)RSTRING_PTR(cached_data),
RSTRING_LENINT(cached_data));
}
ser_array_end(&s, 3);
a = rendezvous(c, &s.b);
e = rb_ary_pop(a);
handle_exception(e);
result = rb_ary_pop(a);
Check_Type(result, T_ARRAY);

script_v = rb_obj_alloc(script_class); // skip the raising initialize
TypedData_Get_Struct(script_v, Script, &script_type, script);
script->context = self;
script->handle_id = NUM2INT(rb_ary_entry(result, 0));
script->cached_data = rb_ary_entry(result, 1);
script->cache_rejected = RTEST(rb_ary_entry(result, 2));
return script_v;
}

static VALUE script_alloc(VALUE klass)
{
Script *s;

s = ruby_xmalloc(sizeof(*s));
memset(s, 0, sizeof(*s));
s->context = Qnil;
s->cached_data = Qnil;
return TypedData_Wrap_Struct(klass, &script_type, s);
}

static void script_free(void *arg)
{
// Intentionally does not send a dispose RPC — finalizers can't safely
// take rr_mtx. State::~State() walks st.scripts at isolate teardown so
// we leak nothing across a Context's lifetime; use Script#dispose to
// free eagerly mid-lifetime.
ruby_xfree(arg);
}

static void script_mark(void *arg)
{
Script *s = arg;
rb_gc_mark(s->context);
rb_gc_mark(s->cached_data);
}

static size_t script_size(const void *arg)
{
const Script *s = arg;
size_t base = sizeof(*s);
if (!NIL_P(s->cached_data))
base += RSTRING_LENINT(s->cached_data);
return base;
}

static VALUE script_initialize(int argc, VALUE *argv, VALUE self)
{
(void)argc; (void)argv; (void)self;
rb_raise(runtime_error, "MiniRacer::Script must be created via Context#compile");
return Qnil;
}

static VALUE script_run(VALUE self)
{
VALUE a, e;
Script *script;
Context *c;
Ser s;

TypedData_Get_Struct(self, Script, &script_type, script);
if (script->disposed)
rb_raise(runtime_error, "disposed script");
TypedData_Get_Struct(script->context, Context, &context_type, c);
if (atomic_load(&c->quit))
rb_raise(context_disposed_error, "disposed context");
ser_init1(&s, 'R');
ser_int(&s, script->handle_id);
a = rendezvous(c, &s.b);
e = rb_ary_pop(a);
handle_exception(e);
return rb_ary_pop(a);
}

static VALUE script_cached_data(VALUE self)
{
Script *script;
TypedData_Get_Struct(self, Script, &script_type, script);
return script->cached_data;
}

static VALUE script_cache_rejected_p(VALUE self)
{
Script *script;
TypedData_Get_Struct(self, Script, &script_type, script);
return script->cache_rejected ? Qtrue : Qfalse;
}

static VALUE script_dispose(VALUE self)
{
VALUE e;
Script *script;
Context *c;
Ser s;

TypedData_Get_Struct(self, Script, &script_type, script);
if (script->disposed) return Qnil;
TypedData_Get_Struct(script->context, Context, &context_type, c);
script->disposed = 1;
// Context already gone? The handle was cleaned by State::~State().
if (atomic_load(&c->quit))
return Qnil;
ser_init1(&s, 'D');
ser_int(&s, script->handle_id);
e = rendezvous(c, &s.b);
handle_exception(e);
return Qnil;
}

static VALUE script_disposed_p(VALUE self)
{
Script *script;
TypedData_Get_Struct(self, Script, &script_type, script);
return script->disposed ? Qtrue : Qfalse;
}

__attribute__((visibility("default")))
void Init_mini_racer_extension(void)
{
VALUE c, m;

id_filename = rb_intern("filename");
id_cached_data = rb_intern("cached_data");

m = rb_define_module("MiniRacer");
c = rb_define_class_under(m, "Error", rb_eStandardError);
snapshot_error = rb_define_class_under(m, "SnapshotError", c);
Expand All @@ -1754,6 +1963,7 @@ void Init_mini_racer_extension(void)
c = context_class = rb_define_class_under(m, "Context", rb_cObject);
rb_define_method(c, "initialize", context_initialize, -1);
rb_define_method(c, "attach", context_attach, 2);
rb_define_method(c, "compile", context_compile, -1);
rb_define_method(c, "dispose", context_dispose, 0);
rb_define_method(c, "stop", context_stop, 0);
rb_define_method(c, "call", context_call, -1);
Expand All @@ -1764,6 +1974,15 @@ void Init_mini_racer_extension(void)
rb_define_method(c, "low_memory_notification", context_low_memory_notification, 0);
rb_define_alloc_func(c, context_alloc);

c = script_class = rb_define_class_under(m, "Script", rb_cObject);
rb_define_method(c, "initialize", script_initialize, -1);
rb_define_method(c, "run", script_run, 0);
rb_define_method(c, "cached_data", script_cached_data, 0);
rb_define_method(c, "cache_rejected?", script_cache_rejected_p, 0);
rb_define_method(c, "dispose", script_dispose, 0);
rb_define_method(c, "disposed?", script_disposed_p, 0);
rb_define_alloc_func(c, script_alloc);

c = snapshot_class = rb_define_class_under(m, "Snapshot", rb_cObject);
rb_define_method(c, "initialize", snapshot_initialize, -1);
rb_define_method(c, "warmup!", snapshot_warmup, 1);
Expand Down
Loading