Skip to content
Draft
4 changes: 4 additions & 0 deletions docs/guide/html-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ Emanote includes a default layout that includes a [[sidebar]], but can be custom
```query
children:.
```

## Diagnosing typos

Heist renders unknown splice tags and `${...}` references as literal text in the page, so a typo like `<ema:tite>` or `${value:sitURL}` used to fail silently. Emanote now scans every rendered route for those leftovers and logs a warning per typo per route — once per process, so a live-server reload does not redo the noise. Watch the `emanote run` console (or the `emanote gen` log) for a `Warn` line and grep your `.tpl` files for the reported tag.
1 change: 1 addition & 0 deletions emanote/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

**Bug fixes**

- Heist templates: a typo in a splice tag (e.g. `<ema:tite>` instead of `<ema:title>`) or in an attribute reference (e.g. `${value:sitURL}`) used to render through silently, with the malformed tag or `${...}` token leaking into the page. Each rendered route is now scanned post-render and a per-route warning is logged for every surviving splice reference (closes [#81](https://github.com/srid/emanote/issues/81)).
- Timeline heatmap backlinks now use the daily note's route-derived date instead of parsing `YYYY-MM-DD` out of the rendered title, so custom-titled daily notes still appear in the heatmap.
- Cascade-declared `tags` no longer disappear from a child note that declares any of its own. The metadata-cascade merger previously inherited `aeson-extra`'s `lodashMerge`, which aligns arrays by index — `tags: [team-doc]` in `folder.yaml` plus `tags: [internal-note]` in `folder/note.md` produced `[internal-note]`, dropping the cascaded entry both from the per-page chip strip and from the `#352` global tag index. The merger now unions cascade arrays; see [yaml-config](docs/guide/yaml-config.md) for the full contract. `aeson-extra` is no longer a dependency (closes [#697](https://github.com/srid/emanote/issues/697)).
- Wiki link custom titles now render HTML entities like `&nbsp;` the same way regular Markdown link labels do. Previously `[[note|Spivak&nbsp;(2014)]]` rendered the entity text literally as `&nbsp;` (closes [#441](https://github.com/srid/emanote/issues/441)).
Expand Down
2 changes: 2 additions & 0 deletions emanote/emanote.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ library
Emanote.View.Feed
Emanote.View.I18n
Emanote.View.JsBundle
Emanote.View.LintTemplate
Emanote.View.LiveServerFiles
Emanote.View.StaticUrl
Emanote.View.TagIndex
Expand Down Expand Up @@ -268,3 +269,4 @@ test-suite test
Emanote.Source.IgnoreSpec
Emanote.View.FeedSpec
Emanote.View.I18nSpec
Emanote.View.LintTemplateSpec
112 changes: 112 additions & 0 deletions emanote/src/Emanote/View/LintTemplate.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{- | Detect Heist template splices that have no binding. Pure: this module
returns the warnings; deciding what to /do/ with them (log, dedupe across
re-renders, surface in a UI banner) is the caller's job.

Emanote uses interpreted Heist, where 'Heist.Interpreted.runNode' leaves an
element verbatim when its name has no binding, and 'getAttributeSplice'
falls back to the literal @${name}@ text when @name@ has no binding. Both
paths fail silently in the rendered HTML. We re-parse the rendered output
and report either signal as an 'UnboundSplice'. Closes
<https://github.com/srid/emanote/issues/81>.
-}
module Emanote.View.LintTemplate (
UnboundSplice (..),
scanRenderedHtml,
formatWarning,
) where

import Data.ByteString qualified as BS
import Data.ByteString.Char8 qualified as BSC
import Data.Text qualified as T
import Relude
import Text.XmlHtml qualified as X

-- | An unbound splice reference detected in rendered HTML output.
data UnboundSplice
= SpliceElement Text
| SpliceAttribute Text
deriving stock (Eq, Ord, Show)

-- | Render a warning as user-facing text.
formatWarning :: UnboundSplice -> Text
formatWarning = \case
SpliceElement nm -> "<" <> nm <> "/>"
SpliceAttribute nm -> "${" <> nm <> "}"

{- | Re-parse rendered HTML and collect every unbound splice reference. Returns
@Left@ on a parse failure so the caller can surface that as its own
diagnostic — silence on unparseable HTML would give a false-clean lint. An
XML document is treated as having no warnings (Emanote does not emit XML
through the Heist pipeline this lints).
-}
scanRenderedHtml :: FilePath -> ByteString -> Either Text [UnboundSplice]
scanRenderedHtml fp bs
-- Cheap byte-level pre-check: if the rendered output has neither a literal
-- @${@ token (an unbound attribute splice always survives as @${name}@) nor
-- a colon inside any tag opener (the only way a @<…:…>@ element can leak),
-- there is nothing to find. Lets the common case skip 'X.parseHTML' and
-- the AST walk entirely.
| not (hasSpliceMarker bs) = Right []
| otherwise = case X.parseHTML fp bs of
Left err -> Left (toText err)
Right (X.HtmlDocument _ _ nodes) -> Right (sortNub (foldMap nodeSplices nodes))
Right X.XmlDocument {} -> Right []

{- | Conservative byte scan: returns 'True' when @bs@ /possibly/ contains an
unbound splice survivor. False positives are fine — they trigger the full
parse, which then correctly reports zero warnings.
-}
hasSpliceMarker :: ByteString -> Bool
hasSpliceMarker bs = "${" `BS.isInfixOf` bs || hasColonTagName bs

hasColonTagName :: ByteString -> Bool
hasColonTagName = go
where
go bs = case BSC.elemIndex '<' bs of
Nothing -> False
Just i ->
let after = BS.drop (i + 1) bs
tagStart = case BSC.uncons after of
Just ('/', rest) -> rest
_ -> after
(name, _) = BSC.break isNameEnd tagStart
in BSC.elem ':' name
|| maybe False (\j -> go (BS.drop (j + 1) after)) (BSC.elemIndex '>' after)
isNameEnd c = c == ' ' || c == '\t' || c == '\n' || c == '/' || c == '>'

{- | Walk an 'X.Node' (and its descendants) into a list of splice references.
Duplicates are collapsed at the document root in 'scanRenderedHtml' rather
than per node — it costs less to fold once than to merge intermediate Sets.

The colon heuristic: any element name with a @:@ is a Heist splice. Plain
HTML5 tag names never contain a colon, and SVG/MathML inlined into HTML5
uses unprefixed forms (@<svg>@, @<math>@, @<mfrac>@). If a future
legitimate use of a colon-bearing tag arises, an allow-list belongs here.
-}
nodeSplices :: X.Node -> [UnboundSplice]
nodeSplices = \case
X.Element name attrs children ->
[SpliceElement name | T.any (== ':') name]
<> concatMap attrSplices attrs
<> foldMap nodeSplices children
_ -> []

attrSplices :: (Text, Text) -> [UnboundSplice]
attrSplices (_, value) = SpliceAttribute <$> attrSpliceRefs value

{- | Extract the names from any @${name}@ tokens in a string. A bare @${@
with no closing brace (or one that wraps a nested @${@, e.g.
@${incomplete ${valid}@) is treated as literal text — we step past it and
keep scanning, so a real splice that follows is still reported on its own.
-}
attrSpliceRefs :: Text -> [Text]
attrSpliceRefs t = case T.breakOn "${" t of
(_, "") -> []
(_, rest) ->
let body = T.drop 2 rest
in case T.breakOn "}" body of
(name, suffix)
| "}" `T.isPrefixOf` suffix
, not (T.any (== '$') name) ->
name : attrSpliceRefs (T.drop 1 suffix)
_ -> attrSpliceRefs body
40 changes: 38 additions & 2 deletions emanote/src/Emanote/View/Template.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Emanote.View.Template (emanoteSiteOutput, render) where

import Control.Monad.Logger (MonadLoggerIO)
import Control.Monad.Logger (MonadLogger, MonadLoggerIO)
import Data.Aeson.Types qualified as Aeson
import Data.Map.Strict qualified as Map
import Data.Map.Syntax ((##))
Expand All @@ -19,15 +19,18 @@ import Emanote.Model.Note qualified as MN
import Emanote.Model.SData qualified as SData
import Emanote.Model.Stork (renderStorkIndex)
import Emanote.Model.Toc (newToc, renderToc, tocUnnecessaryToRender)
import Emanote.Prelude (logW)
import Emanote.Route qualified as R
import Emanote.Route.SiteRoute (SiteRoute)
import Emanote.Route.SiteRoute qualified as SR
import Emanote.Route.SiteRoute.Class (indexRoute)
import Emanote.View.Common qualified as C
import Emanote.View.Export (renderExport)
import Emanote.View.Feed (feedDiscoveryLink, renderFeed)
import Emanote.View.LintTemplate (UnboundSplice, formatWarning, scanRenderedHtml)
import Emanote.View.TagIndex qualified as TagIndex
import Emanote.View.TaskIndex qualified as TaskIndex
import GHC.IO.Unsafe (unsafePerformIO)
import Heist qualified as H
import Heist.Extra.Splices.List qualified as Splices
import Heist.Extra.Splices.Pandoc qualified as Splices
Expand All @@ -45,7 +48,9 @@ import Text.Pandoc.Definition (Pandoc (..))
emanoteSiteOutput :: (MonadIO m, MonadLoggerIO m) => Prism' FilePath SiteRoute -> ModelEma -> SR.SiteRoute -> m (Ema.Asset LByteString)
emanoteSiteOutput rp model' r = do
let model = M.withRoutePrism rp model'
render model r <&> fmap fixStaticUrl
asset <- render model r <&> fmap fixStaticUrl
warnUnboundSplices (SR.siteRouteUrl model r) asset
pure asset
where
-- See the FIXME in more-head.tpl.
fixStaticUrl :: LByteString -> LByteString
Expand All @@ -67,6 +72,37 @@ emanoteSiteOutput rp model' r = do
guard $ not $ T.null prefix
pure prefix

{- | See 'Emanote.View.LintTemplate.scanRenderedHtml' for the scanning
rationale. This wrapper owns dedup (per @(route, splice)@) and log
delivery so the lint module stays a pure scanner.
-}
warnUnboundSplices :: (MonadIO m, MonadLogger m) => Text -> Ema.Asset LByteString -> m ()
warnUnboundSplices routeUrl = \case
Ema.AssetGenerated Ema.Html bytes ->
case scanRenderedHtml (toString routeUrl) (toStrict bytes) of
Left err -> warn $ "lint parse failed: " <> err
Right warnings -> do
fresh <- liftIO $ atomicModifyIORef' lintWarningCache $ \seen ->
let entries = Set.fromList ((routeUrl,) <$> warnings)
new = Set.difference entries seen
in (Set.union seen entries, snd <$> Set.toAscList new)
forM_ fresh $ warn . ("unbound splice " <>) . formatWarning
-- Static files and Atom/JSON assets bypass the Heist render path, so
-- there is no template-substitution surface to lint here.
_ -> pass
where
warn detail = logW $ "Template lint on '" <> routeUrl <> "': " <> detail

{- | Process-wide cache of @(route, splice)@ pairs already logged. Lives at the
rendering orchestration layer rather than inside 'Emanote.View.LintTemplate'
so the lint module stays a pure scanner. Bounded in practice by (number of
rendered routes) × (distinct splice typos in the user's templates), which is
small for any reasonable site — there is no eviction.
-}
{-# NOINLINE lintWarningCache #-}
lintWarningCache :: IORef (Set (Text, UnboundSplice))
lintWarningCache = unsafePerformIO (newIORef mempty)

render :: (MonadIO m, MonadLoggerIO m) => Model -> SR.SiteRoute -> m (Ema.Asset LByteString)
render m = \case
SR.SiteRoute_MissingR urlPath -> do
Expand Down
58 changes: 58 additions & 0 deletions emanote/test/Emanote/View/LintTemplateSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
module Emanote.View.LintTemplateSpec where

import Emanote.View.LintTemplate (UnboundSplice (..), formatWarning, scanRenderedHtml)
import Relude
import Test.Hspec

spec :: Spec
spec = do
describe "scanRenderedHtml" $ do
it "ignores well-rendered HTML with no splice survivors" $ do
scanRenderedHtml "ok.html" "<html><body><p>hello</p></body></html>"
`shouldBe` Right []

it "reports an element whose tag name still has a Heist-style colon" $ do
scanRenderedHtml "ok.html" "<html><body><ema:tite></ema:tite></body></html>"
`shouldBe` Right [SpliceElement "ema:tite"]

it "reports nested unbound element splices" $ do
scanRenderedHtml "ok.html" "<html><body><div><ema:foo>x</ema:foo></div></body></html>"
`shouldBe` Right [SpliceElement "ema:foo"]

it "reports a literal ${name} that survived as an attribute value" $ do
scanRenderedHtml "ok.html" "<html><body><a href=\"${value:siteUrl}\">x</a></body></html>"
`shouldBe` Right [SpliceAttribute "value:siteUrl"]

it "reports multiple ${...} references in one attribute" $ do
scanRenderedHtml "ok.html" "<html><body><img alt=\"${a} - ${b}\"/></body></html>"
`shouldBe` Right [SpliceAttribute "a", SpliceAttribute "b"]

it "deduplicates repeated occurrences across the document" $ do
scanRenderedHtml "ok.html" "<html><body><ema:foo></ema:foo><ema:foo></ema:foo></body></html>"
`shouldBe` Right [SpliceElement "ema:foo"]

it "leaves text nodes alone (e.g. ${...} appearing inside a <pre>)" $ do
scanRenderedHtml "ok.html" "<html><body><pre>${not-an-attr}</pre></body></html>"
`shouldBe` Right []

it "still reports a valid ${...} that follows an unbalanced ${" $ do
scanRenderedHtml "ok.html" "<html><body><a title=\"${incomplete ${valid}\">x</a></body></html>"
`shouldBe` Right [SpliceAttribute "valid"]

it "ignores a trailing ${ with no closing brace" $ do
scanRenderedHtml "ok.html" "<html><body><a title=\"${valid} ${trailing\">x</a></body></html>"
`shouldBe` Right [SpliceAttribute "valid"]

it "skips the parse entirely when the bytes have no splice marker" $ do
-- The pre-check short-circuits to Right [] without invoking parseHTML,
-- so even malformed HTML returns clean — there is nothing for the lint
-- to find when the bytes contain neither '${' nor a colon-tag.
scanRenderedHtml "bad.html" "<html><body><div></span></body></html>"
`shouldBe` Right []

describe "formatWarning" $ do
it "formats element warnings as a tag" $ do
formatWarning (SpliceElement "ema:tite") `shouldBe` "<ema:tite/>"

it "formats attribute warnings as a dollar-brace token" $ do
formatWarning (SpliceAttribute "value:siteUrl") `shouldBe` "${value:siteUrl}"
Loading