srid · srid · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/docs/dev/ralph/memory-66/README.md b/docs/dev/ralph/memory-66/README.md
diff --git a/docs/dev/ralph/memory-66/gen_corpus.py b/docs/dev/ralph/memory-66/gen_corpus.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""Generate a synthetic emanote notebook of ~4500 markdown files, ~70MB total.
+
+Each file has:
+- A title heading
+- A YAML frontmatter sometimes
+- Several paragraphs of lorem-like text with wikilinks
+- A few headings
+- Some inline code, occasional list, occasional code block
+
+Wikilinks form a random graph so the link index actually has work to do.
+"""
+import os, random, sys, hashlib, string
+
+random.seed(42)
+
+OUT = sys.argv[1] if len(sys.argv) > 1 else "/home/toor/corpus"
+N = int(sys.argv[2]) if len(sys.argv) > 2 else 4500
+TARGET_BYTES = int(sys.argv[3]) if len(sys.argv) > 3 else 70 * 1024 * 1024
+AVG_BYTES = TARGET_BYTES // N
+
+WORDS = ("the quick brown fox jumps over the lazy dog functor monad applicative haskell "
+         "pandoc emanote ema lvar parser source eval render template heist note ix set "
+         "memory leak profile retainer cost centre static unboxed strict thunk graph link "
+         "wikilink folgezettel sequel zettel obsidian roam neuron foam dendron logseq "
+         "atomic note structure architecture optimisation cycle measurement baseline "
+         "decision dependency volatility encapsulation closure capture share unsharing").split()
+
+TAGS_POOL = ["haskell", "design", "perf", "note", "todo", "idea", "ref", "math", "wip", "draft",
+             "review", "meta", "tool", "lit", "code", "infra", "ux", "ops", "test", "spec"]
+
+def folder_for(i):
+    # 32 top-level folders, optional nested
+    top = f"topic{i % 32:02d}"
+    if i % 7 == 0:
+        return os.path.join(top, f"sub{(i // 32) % 11}")
+    return top
+
+def slug(i):
+    return f"n{i:05d}"
+
+def title(i):
+    return " ".join(random.sample(WORDS, k=random.randint(2, 5))).title()
+
+def paragraph(words=120):
+    out = []
+    while sum(len(w) for w in out) + len(out) < words:
+        out.append(random.choice(WORDS))
+    s = " ".join(out)
+    return s[0].upper() + s[1:] + "."
+
+def wikilink(target_i, alias=None):
+    t = slug(target_i)
+    if alias:
+        return f"[[{t}|{alias}]]"
+    return f"[[{t}]]"
+
+def write_file(i, n, path):
+    has_fm = (i % 3 != 0)
+    lines = []
+    if has_fm:
+        tags = random.sample(TAGS_POOL, k=random.randint(0, 4))
+        lines.append("---")
+        lines.append(f"title: {title(i)}")
+        if tags:
+            lines.append("tags:")
+            for t in tags:
+                lines.append(f"  - {t}")
+        if i % 13 == 0:
+            lines.append(f"order: {i % 50}")
+        lines.append("---")
+        lines.append("")
+    lines.append(f"# {title(i)}")
+    lines.append("")
+    # body — keep generating paragraphs until size ~ AVG_BYTES
+    target = max(2000, int(random.gauss(AVG_BYTES, AVG_BYTES / 4)))
+    while sum(len(x) for x in lines) < target:
+        kind = random.random()
+        if kind < 0.55:
+            p = paragraph(random.randint(40, 180))
+            # sprinkle wikilinks
+            tokens = p.split()
+            for _ in range(random.randint(1, 4)):
+                j = random.randrange(len(tokens))
+                target_i = random.randrange(n)
+                alias = tokens[j] if random.random() < 0.5 else None
+                tokens[j] = wikilink(target_i, alias)
+            lines.append(" ".join(tokens))
+            lines.append("")
+        elif kind < 0.7:
+            lines.append(f"## {title(i)}")
+            lines.append("")
+        elif kind < 0.82:
+            # list
+            for _ in range(random.randint(3, 8)):
+                lines.append(f"- {paragraph(random.randint(8, 25))}")
+            lines.append("")
+        elif kind < 0.92:
+            # code block
+            lines.append("```haskell")
+            lines.append(f"foo{i} :: Int -> Int")
+            lines.append(f"foo{i} x = x + {i}")
+            lines.append("```")
+            lines.append("")
+        else:
+            # embedded note (becomes processed)
+            lines.append(f"![[{slug(random.randrange(n))}]]")
+            lines.append("")
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w") as f:
+        f.write("\n".join(lines))
+
+def main():
+    os.makedirs(OUT, exist_ok=True)
+    # an index.md at root
+    with open(os.path.join(OUT, "index.md"), "w") as f:
+        f.write("# Synthetic Corpus\n\nGenerated by gen_corpus.py for emanote #66 reproduction.\n")
+    for i in range(N):
+        rel = os.path.join(folder_for(i), slug(i) + ".md")
+        write_file(i, N, os.path.join(OUT, rel))
+    # report
+    total = 0
+    count = 0
+    for root, _, files in os.walk(OUT):
+        for f in files:
+            if f.endswith(".md"):
+                total += os.path.getsize(os.path.join(root, f))
+                count += 1
+    print(f"Wrote {count} files, {total/1024/1024:.1f} MB total", flush=True)
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/dev/ralph/memory-66/measure.sh b/docs/dev/ralph/memory-66/measure.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+set -eo pipefail
+EMANOTE=${EMANOTE:-/home/toor/code/emanote/dist-newstyle/build/x86_64-linux/ghc-9.8.4/emanote-2.0.0.0/x/emanote/build/emanote/emanote}
+export emanote_datadir=${emanote_datadir:-/home/toor/code/emanote/emanote/default}
+CORPUS=${1:?corpus path}
+RTS=${2:-}
+PORT=${PORT:-$(( RANDOM % 10000 + 9000 ))}
+TIMEOUT=${TIMEOUT:-600}
+LOG=$(mktemp)
+cd "$CORPUS"
+$EMANOTE -L "$CORPUS" run --port "$PORT" $([ -n "$RTS" ] && echo +RTS $RTS -RTS) > "$LOG" 2>&1 &
+PID=$!
+READY=0
+for i in $(seq 1 "$TIMEOUT"); do
+  if ! kill -0 $PID 2>/dev/null; then echo "emanote died" >&2; tail -40 "$LOG" >&2; exit 1; fi
+  if curl -s -o /dev/null --max-time 1 "http://localhost:$PORT/"; then READY=$i; break; fi
+  sleep 1
+done
+[ "$READY" = 0 ] && { echo "timeout" >&2; kill $PID; exit 1; }
+LOAD_RSS=$(awk '/VmRSS/{print $2}' /proc/$PID/status)
+echo "READY_AFTER_S=$READY"
+echo "LOAD_RSS_MB=$(awk -v r=$LOAD_RSS 'BEGIN{printf "%.0f", r/1024}')"
+kill -INT $PID 2>/dev/null || true
+sleep 2
+kill $PID 2>/dev/null || true
+wait $PID 2>/dev/null || true
+echo "---LOG TAIL---"
+tail -60 "$LOG"
diff --git a/emanote/emanote.cabal b/emanote/emanote.cabal
@@ -113,6 +113,7 @@ common library-common
     , commonmark-wikilink    >=0.2
     , containers
     , data-default
+    , deepseq
     , deriving-aeson
     , directory
     , ema                    >=0.10.1
@@ -241,7 +242,11 @@ executable emanote
   import:         library-common
   hs-source-dirs: exe
   main-is:        Main.hs
-  ghc-options:    -threaded -rtsopts -with-rtsopts=-N
+  -- -F1.5: shrink the old-generation retention factor from the GHC default
+  -- (2.0) to 1.5, trading a few extra major GCs for ~30% lower RSS on large
+  -- notebooks (see docs/dev/ralph/memory-66/README.md, cycle 2). Users can
+  -- still override at runtime, e.g. `emanote run +RTS -F2 -RTS`.
+  ghc-options:    -threaded -rtsopts "-with-rtsopts=-N -F1.5"
 
   if flag(ghcid)
     hs-source-dirs: src

diff --git a/emanote/src/Emanote/Model/Graph.hs b/emanote/src/Emanote/Model/Graph.hs
@@ -3,7 +3,6 @@ module Emanote.Model.Graph where
 import Commonmark.Extensions.WikiLink qualified as WL
 import Data.IxSet.Typed ((@+), (@=))
 import Data.IxSet.Typed qualified as Ix
-import Data.Map.Strict qualified as Map
 import Data.Set qualified as Set
 import Data.Tree (Forest, Tree (Node))
 import Emanote.Model.Calendar qualified as Calendar
@@ -12,7 +11,7 @@ import Emanote.Model.Link.Resolve qualified as Resolve
 import Emanote.Model.Meta (lookupRouteMeta)
 import Emanote.Model.Note qualified as MN
 import Emanote.Model.Note qualified as N
-import Emanote.Model.Type (Model, modelIndexRoute, modelNotes, modelRels, parentLmlRoute)
+import Emanote.Model.Type (Model, modelIndexRoute, modelLookupNoteByRoute', modelNotes, modelRels, parentLmlRoute)
 import Emanote.Route qualified as R
 import Emanote.Route.SiteRoute qualified as SR
 import Optics.Operators as Lens ((^.))
@@ -176,20 +175,24 @@ lookupNoteByWikiLink model currentRoute wl = do
 modelLookupBacklinks :: R.LMLRoute -> Model -> [(R.LMLRoute, NonEmpty [B.Block])]
 modelLookupBacklinks r model =
   sortOn (Calendar.backlinkSortKey model . fst)
-    $ groupNE
+    $ mapMaybe withCtx
+    $ groupBySource
     $ backlinkRels r model
-    <&> \rel ->
-      (rel ^. Rel.relFrom, rel ^. Rel.relCtx)
   where
-    groupNE :: forall a b. (Ord a) => [(a, b)] -> [(a, NonEmpty b)]
-    groupNE =
-      Map.toList . foldl' f Map.empty
-      where
-        f :: Map a (NonEmpty b) -> (a, b) -> Map a (NonEmpty b)
-        f m (x, y) =
-          case Map.lookup x m of
-            Nothing -> Map.insert x (one y) m
-            Just ys -> Map.insert x (ys <> one y) m
+    -- Group backlink-rels by their source route. Context blocks are no
+    -- longer carried on each Rel (#66) — instead they are recovered once
+    -- per source note by re-walking the source's Pandoc, which is cheap
+    -- (one note's AST) compared to retaining contexts in _modelRels for
+    -- every link in the entire notebook.
+    groupBySource :: [Rel.Rel] -> [R.LMLRoute]
+    groupBySource = ordNub . fmap (^. Rel.relFrom)
+    targetMR :: R.ModelRoute
+    targetMR = R.ModelRoute_LML R.LMLView_Html r
+    withCtx :: R.LMLRoute -> Maybe (R.LMLRoute, NonEmpty [B.Block])
+    withCtx from = do
+      sourceNote <- modelLookupNoteByRoute' from model
+      ctxs <- nonEmpty $ Rel.noteRelCtxToTarget targetMR sourceNote
+      pure (from, ctxs)
 
 -- | Rels pointing *to* this route
 backlinkRels :: R.LMLRoute -> Model -> [Rel.Rel]

diff --git a/emanote/src/Emanote/Model/Link/Rel.hs b/emanote/src/Emanote/Model/Link/Rel.hs
@@ -98,7 +98,33 @@ noteRels note =
             pure (target, ctx)
        in Ix.fromList $ zipWith mkRel [0 ..] links
       where
-        mkRel srcPos (target, ctx) = Rel (note ^. noteRoute) target srcPos ctx
+        -- Drop the per-Rel `[B.Block]` context at insert time and recover
+        -- it on demand at backlink-render time by re-walking the source
+        -- note's Pandoc (see 'noteRelCtxToTarget' / 'modelLookupBacklinks'
+        -- in @Emanote.Model.Graph@). The context is a chunk of Pandoc
+        -- Blocks per outgoing link; with thousands of notes and dozens of
+        -- outgoing links each, persisting it in @_modelRels@ dominates the
+        -- live-data overhead (#66). The on-demand walk is bounded by the
+        -- source note's own AST size — fast for any single backlinks page.
+        mkRel srcPos (target, _ctx) = Rel (note ^. noteRoute) target srcPos []
+
+{- | Re-extract the Pandoc-block contexts of every outgoing link in
+@sourceNote@ that points to @targetMR@. Used by the backlinks renderer
+to recover the context that 'noteRels' deliberately drops at insert
+time (#66). Cost is one walk of the source note's Pandoc per backlink
+expansion — paid only when the @targetMR@'s backlinks page is rendered.
+-}
+noteRelCtxToTarget :: ModelRoute -> Note -> [[B.Block]]
+noteRelCtxToTarget targetMR sourceNote =
+  let contextsByUrl = LC.queryLinksWithContext (sourceNote ^. noteDoc)
+      parentR = noteResolveLinkBase sourceNote
+      targets = unresolvedRelsTo targetMR
+   in do
+        (url, instances) <- Map.toList contextsByUrl
+        (attrs, ctx) <- reverse (toList instances)
+        target <- maybeToList $ fst <$> parseUnresolvedRelTarget parentR attrs url
+        guard $ target `elem` targets
+        pure ctx
 
 {- | All `UnresolvedRelTarget`s that could resolve to the given
 `ModelRoute`. Each `URTResource` form is built by re-parsing a URL

diff --git a/emanote/src/Emanote/Source/Patch.hs b/emanote/src/Emanote/Source/Patch.hs
@@ -5,7 +5,9 @@ module Emanote.Source.Patch (
   ignorePatterns,
 ) where
 
+import Control.DeepSeq (deepseq)
 import Control.Monad.Logger (LoggingT (runLoggingT), MonadLogger, MonadLoggerIO (askLoggerIO))
+import Data.Aeson qualified as Aeson
 import Data.ByteString qualified as BS
 import Data.List qualified as List
 import Data.List.NonEmpty qualified as NEL
@@ -255,6 +257,9 @@ parseAndInsert noteF model refreshAction r src = do
   s <- readRefreshedFile refreshAction (locResolve src)
   note <-
     N.parseNote (model ^. M.modelScriptingEngine) (M.modelPluginBaseDir model) r src (decodeUtf8 s)
+  -- Force the parsed Pandoc and Aeson Value so per-file parser closures
+  -- can be released as we stream files into the model (#66).
+  note ^. N.noteDoc `deepseq` (note ^. N.noteMeta :: Aeson.Value) `deepseq` pure ()
   pure
     $ M.modelInsertNote (noteF note)
     >>> (modelSourceDependencies %~ SDeps.setLuaDeps r src (note ^. N.notePandocFilterDeclarations))

diff --git a/emanote/test/Emanote/Model/Link/RelSpec.hs b/emanote/test/Emanote/Model/Link/RelSpec.hs
@@ -1,6 +1,5 @@
 module Emanote.Model.Link.RelSpec where
 
-import Commonmark.Extensions.WikiLink qualified as WL
 import Data.IxSet.Typed qualified as Ix
 import Emanote.Model.Link.Rel
 import Emanote.Model.Note qualified as MN
@@ -100,21 +99,20 @@ spec = do
       got === want
   describe "noteRels source order (issue #186)" $ do
     it "orders rels by source position, not by lexicographic Ord on context" $ do
-      -- 'Z' sorts last lexicographically but comes first in source; 'A'
-      -- sorts first but comes second. Without the srcPos tie-breaker,
-      -- Ord [Block] would yield A-then-Z; we want source order.
+      -- Both 'z' and 'a' link to the same target via the same URL, so
+      -- the two rels share (_relFrom, _relTo) and can only be ordered
+      -- by _relSrcPos. Source order is "Z first" then "A second", so
+      -- IxSet.toList should produce srcPos [0, 1] in that order.
+      -- (#66 dropped _relCtx — see Rel.noteRelCtxToTarget for the
+      -- on-demand backlinks-context recovery path.)
       let mkLink lbl = B.Link B.nullAttr [B.Str lbl] ("Foo.md", "")
           note =
             MN.mkEmptyNoteWith
               barRoute
               [ B.Para [B.Str "Z first: ", mkLink "z"]
               , B.Para [B.Str "A second: ", mkLink "a"]
               ]
-          paraText rel = case _relCtx rel of
-            [B.Para is] -> WL.plainify is
-            other -> error $ "expected single-paragraph context, got " <> show other
-      (paraText <$> Ix.toList (noteRels note))
-        `shouldBe` ["Z first: z", "A second: a"]
+      (_relSrcPos <$> Ix.toList (noteRels note)) `shouldBe` [0, 1]
     it "does not collapse two identical-context links to the same target" $ do
       -- One paragraph mentions Foo.md twice. The two rels share
       -- (relFrom, relTo, relCtx); without srcPos in Ord, IxSet.fromList's