diff --git a/docs/guide/html-template.md b/docs/guide/html-template.md index 117660a99..2e8ac721c 100644 --- a/docs/guide/html-template.md +++ b/docs/guide/html-template.md @@ -17,3 +17,7 @@ Emanote includes a default layout that includes a [[sidebar]], but can be custom ```query children:. ``` + +## Diagnosing typos + +Heist renders unknown splice tags and `${...}` references as literal text in the page, so a typo like `` or `${value:sitURL}` used to fail silently. Emanote now scans every rendered route for those leftovers and logs a warning per typo per route — once per process, so a live-server reload does not redo the noise. Watch the `emanote run` console (or the `emanote gen` log) for a `Warn` line and grep your `.tpl` files for the reported tag. diff --git a/emanote/CHANGELOG.md b/emanote/CHANGELOG.md index 184f29a0b..2c66a26cf 100644 --- a/emanote/CHANGELOG.md +++ b/emanote/CHANGELOG.md @@ -25,6 +25,7 @@ **Bug fixes** +- Heist templates: a typo in a splice tag (e.g. `` instead of ``) or in an attribute reference (e.g. `${value:sitURL}`) used to render through silently, with the malformed tag or `${...}` token leaking into the page. Each rendered route is now scanned post-render and a per-route warning is logged for every surviving splice reference (closes [#81](https://github.com/srid/emanote/issues/81)). - Timeline heatmap backlinks now use the daily note's route-derived date instead of parsing `YYYY-MM-DD` out of the rendered title, so custom-titled daily notes still appear in the heatmap. - Cascade-declared `tags` no longer disappear from a child note that declares any of its own. The metadata-cascade merger previously inherited `aeson-extra`'s `lodashMerge`, which aligns arrays by index — `tags: [team-doc]` in `folder.yaml` plus `tags: [internal-note]` in `folder/note.md` produced `[internal-note]`, dropping the cascaded entry both from the per-page chip strip and from the `#352` global tag index. The merger now unions cascade arrays; see [yaml-config](docs/guide/yaml-config.md) for the full contract. `aeson-extra` is no longer a dependency (closes [#697](https://github.com/srid/emanote/issues/697)). - Wiki link custom titles now render HTML entities like ` ` the same way regular Markdown link labels do. Previously `[[note|Spivak (2014)]]` rendered the entity text literally as ` ` (closes [#441](https://github.com/srid/emanote/issues/441)). diff --git a/emanote/emanote.cabal b/emanote/emanote.cabal index f039d875c..8b1bea6fb 100644 --- a/emanote/emanote.cabal +++ b/emanote/emanote.cabal @@ -218,6 +218,7 @@ library Emanote.View.Feed Emanote.View.I18n Emanote.View.JsBundle + Emanote.View.LintTemplate Emanote.View.LiveServerFiles Emanote.View.StaticUrl Emanote.View.TagIndex @@ -268,3 +269,4 @@ test-suite test Emanote.Source.IgnoreSpec Emanote.View.FeedSpec Emanote.View.I18nSpec + Emanote.View.LintTemplateSpec diff --git a/emanote/src/Emanote/View/LintTemplate.hs b/emanote/src/Emanote/View/LintTemplate.hs new file mode 100644 index 000000000..90b621f14 --- /dev/null +++ b/emanote/src/Emanote/View/LintTemplate.hs @@ -0,0 +1,112 @@ +{- | Detect Heist template splices that have no binding. Pure: this module +returns the warnings; deciding what to /do/ with them (log, dedupe across +re-renders, surface in a UI banner) is the caller's job. + +Emanote uses interpreted Heist, where 'Heist.Interpreted.runNode' leaves an +element verbatim when its name has no binding, and 'getAttributeSplice' +falls back to the literal @${name}@ text when @name@ has no binding. Both +paths fail silently in the rendered HTML. We re-parse the rendered output +and report either signal as an 'UnboundSplice'. Closes +. +-} +module Emanote.View.LintTemplate ( + UnboundSplice (..), + scanRenderedHtml, + formatWarning, +) where + +import Data.ByteString qualified as BS +import Data.ByteString.Char8 qualified as BSC +import Data.Text qualified as T +import Relude +import Text.XmlHtml qualified as X + +-- | An unbound splice reference detected in rendered HTML output. +data UnboundSplice + = SpliceElement Text + | SpliceAttribute Text + deriving stock (Eq, Ord, Show) + +-- | Render a warning as user-facing text. +formatWarning :: UnboundSplice -> Text +formatWarning = \case + SpliceElement nm -> "<" <> nm <> "/>" + SpliceAttribute nm -> "${" <> nm <> "}" + +{- | Re-parse rendered HTML and collect every unbound splice reference. Returns +@Left@ on a parse failure so the caller can surface that as its own +diagnostic — silence on unparseable HTML would give a false-clean lint. An +XML document is treated as having no warnings (Emanote does not emit XML +through the Heist pipeline this lints). +-} +scanRenderedHtml :: FilePath -> ByteString -> Either Text [UnboundSplice] +scanRenderedHtml fp bs + -- Cheap byte-level pre-check: if the rendered output has neither a literal + -- @${@ token (an unbound attribute splice always survives as @${name}@) nor + -- a colon inside any tag opener (the only way a @<…:…>@ element can leak), + -- there is nothing to find. Lets the common case skip 'X.parseHTML' and + -- the AST walk entirely. + | not (hasSpliceMarker bs) = Right [] + | otherwise = case X.parseHTML fp bs of + Left err -> Left (toText err) + Right (X.HtmlDocument _ _ nodes) -> Right (sortNub (foldMap nodeSplices nodes)) + Right X.XmlDocument {} -> Right [] + +{- | Conservative byte scan: returns 'True' when @bs@ /possibly/ contains an +unbound splice survivor. False positives are fine — they trigger the full +parse, which then correctly reports zero warnings. +-} +hasSpliceMarker :: ByteString -> Bool +hasSpliceMarker bs = "${" `BS.isInfixOf` bs || hasColonTagName bs + +hasColonTagName :: ByteString -> Bool +hasColonTagName = go + where + go bs = case BSC.elemIndex '<' bs of + Nothing -> False + Just i -> + let after = BS.drop (i + 1) bs + tagStart = case BSC.uncons after of + Just ('/', rest) -> rest + _ -> after + (name, _) = BSC.break isNameEnd tagStart + in BSC.elem ':' name + || maybe False (\j -> go (BS.drop (j + 1) after)) (BSC.elemIndex '>' after) + isNameEnd c = c == ' ' || c == '\t' || c == '\n' || c == '/' || c == '>' + +{- | Walk an 'X.Node' (and its descendants) into a list of splice references. +Duplicates are collapsed at the document root in 'scanRenderedHtml' rather +than per node — it costs less to fold once than to merge intermediate Sets. + +The colon heuristic: any element name with a @:@ is a Heist splice. Plain +HTML5 tag names never contain a colon, and SVG/MathML inlined into HTML5 +uses unprefixed forms (@@, @@, @@). If a future +legitimate use of a colon-bearing tag arises, an allow-list belongs here. +-} +nodeSplices :: X.Node -> [UnboundSplice] +nodeSplices = \case + X.Element name attrs children -> + [SpliceElement name | T.any (== ':') name] + <> concatMap attrSplices attrs + <> foldMap nodeSplices children + _ -> [] + +attrSplices :: (Text, Text) -> [UnboundSplice] +attrSplices (_, value) = SpliceAttribute <$> attrSpliceRefs value + +{- | Extract the names from any @${name}@ tokens in a string. A bare @${@ +with no closing brace (or one that wraps a nested @${@, e.g. +@${incomplete ${valid}@) is treated as literal text — we step past it and +keep scanning, so a real splice that follows is still reported on its own. +-} +attrSpliceRefs :: Text -> [Text] +attrSpliceRefs t = case T.breakOn "${" t of + (_, "") -> [] + (_, rest) -> + let body = T.drop 2 rest + in case T.breakOn "}" body of + (name, suffix) + | "}" `T.isPrefixOf` suffix + , not (T.any (== '$') name) -> + name : attrSpliceRefs (T.drop 1 suffix) + _ -> attrSpliceRefs body diff --git a/emanote/src/Emanote/View/Template.hs b/emanote/src/Emanote/View/Template.hs index 4d126f989..3dca96b65 100644 --- a/emanote/src/Emanote/View/Template.hs +++ b/emanote/src/Emanote/View/Template.hs @@ -1,6 +1,6 @@ module Emanote.View.Template (emanoteSiteOutput, render) where -import Control.Monad.Logger (MonadLoggerIO) +import Control.Monad.Logger (MonadLogger, MonadLoggerIO) import Data.Aeson.Types qualified as Aeson import Data.Map.Strict qualified as Map import Data.Map.Syntax ((##)) @@ -19,6 +19,7 @@ import Emanote.Model.Note qualified as MN import Emanote.Model.SData qualified as SData import Emanote.Model.Stork (renderStorkIndex) import Emanote.Model.Toc (newToc, renderToc, tocUnnecessaryToRender) +import Emanote.Prelude (logW) import Emanote.Route qualified as R import Emanote.Route.SiteRoute (SiteRoute) import Emanote.Route.SiteRoute qualified as SR @@ -26,8 +27,10 @@ import Emanote.Route.SiteRoute.Class (indexRoute) import Emanote.View.Common qualified as C import Emanote.View.Export (renderExport) import Emanote.View.Feed (feedDiscoveryLink, renderFeed) +import Emanote.View.LintTemplate (UnboundSplice, formatWarning, scanRenderedHtml) import Emanote.View.TagIndex qualified as TagIndex import Emanote.View.TaskIndex qualified as TaskIndex +import GHC.IO.Unsafe (unsafePerformIO) import Heist qualified as H import Heist.Extra.Splices.List qualified as Splices import Heist.Extra.Splices.Pandoc qualified as Splices @@ -45,7 +48,9 @@ import Text.Pandoc.Definition (Pandoc (..)) emanoteSiteOutput :: (MonadIO m, MonadLoggerIO m) => Prism' FilePath SiteRoute -> ModelEma -> SR.SiteRoute -> m (Ema.Asset LByteString) emanoteSiteOutput rp model' r = do let model = M.withRoutePrism rp model' - render model r <&> fmap fixStaticUrl + asset <- render model r <&> fmap fixStaticUrl + warnUnboundSplices (SR.siteRouteUrl model r) asset + pure asset where -- See the FIXME in more-head.tpl. fixStaticUrl :: LByteString -> LByteString @@ -67,6 +72,37 @@ emanoteSiteOutput rp model' r = do guard $ not $ T.null prefix pure prefix +{- | See 'Emanote.View.LintTemplate.scanRenderedHtml' for the scanning +rationale. This wrapper owns dedup (per @(route, splice)@) and log +delivery so the lint module stays a pure scanner. +-} +warnUnboundSplices :: (MonadIO m, MonadLogger m) => Text -> Ema.Asset LByteString -> m () +warnUnboundSplices routeUrl = \case + Ema.AssetGenerated Ema.Html bytes -> + case scanRenderedHtml (toString routeUrl) (toStrict bytes) of + Left err -> warn $ "lint parse failed: " <> err + Right warnings -> do + fresh <- liftIO $ atomicModifyIORef' lintWarningCache $ \seen -> + let entries = Set.fromList ((routeUrl,) <$> warnings) + new = Set.difference entries seen + in (Set.union seen entries, snd <$> Set.toAscList new) + forM_ fresh $ warn . ("unbound splice " <>) . formatWarning + -- Static files and Atom/JSON assets bypass the Heist render path, so + -- there is no template-substitution surface to lint here. + _ -> pass + where + warn detail = logW $ "Template lint on '" <> routeUrl <> "': " <> detail + +{- | Process-wide cache of @(route, splice)@ pairs already logged. Lives at the +rendering orchestration layer rather than inside 'Emanote.View.LintTemplate' +so the lint module stays a pure scanner. Bounded in practice by (number of +rendered routes) × (distinct splice typos in the user's templates), which is +small for any reasonable site — there is no eviction. +-} +{-# NOINLINE lintWarningCache #-} +lintWarningCache :: IORef (Set (Text, UnboundSplice)) +lintWarningCache = unsafePerformIO (newIORef mempty) + render :: (MonadIO m, MonadLoggerIO m) => Model -> SR.SiteRoute -> m (Ema.Asset LByteString) render m = \case SR.SiteRoute_MissingR urlPath -> do diff --git a/emanote/test/Emanote/View/LintTemplateSpec.hs b/emanote/test/Emanote/View/LintTemplateSpec.hs new file mode 100644 index 000000000..50f29c4e9 --- /dev/null +++ b/emanote/test/Emanote/View/LintTemplateSpec.hs @@ -0,0 +1,58 @@ +module Emanote.View.LintTemplateSpec where + +import Emanote.View.LintTemplate (UnboundSplice (..), formatWarning, scanRenderedHtml) +import Relude +import Test.Hspec + +spec :: Spec +spec = do + describe "scanRenderedHtml" $ do + it "ignores well-rendered HTML with no splice survivors" $ do + scanRenderedHtml "ok.html" "

hello

" + `shouldBe` Right [] + + it "reports an element whose tag name still has a Heist-style colon" $ do + scanRenderedHtml "ok.html" "" + `shouldBe` Right [SpliceElement "ema:tite"] + + it "reports nested unbound element splices" $ do + scanRenderedHtml "ok.html" "
x
" + `shouldBe` Right [SpliceElement "ema:foo"] + + it "reports a literal ${name} that survived as an attribute value" $ do + scanRenderedHtml "ok.html" "x" + `shouldBe` Right [SpliceAttribute "value:siteUrl"] + + it "reports multiple ${...} references in one attribute" $ do + scanRenderedHtml "ok.html" "\"${a}" + `shouldBe` Right [SpliceAttribute "a", SpliceAttribute "b"] + + it "deduplicates repeated occurrences across the document" $ do + scanRenderedHtml "ok.html" "" + `shouldBe` Right [SpliceElement "ema:foo"] + + it "leaves text nodes alone (e.g. ${...} appearing inside a
)" $ do
+      scanRenderedHtml "ok.html" "
${not-an-attr}
" + `shouldBe` Right [] + + it "still reports a valid ${...} that follows an unbalanced ${" $ do + scanRenderedHtml "ok.html" "x" + `shouldBe` Right [SpliceAttribute "valid"] + + it "ignores a trailing ${ with no closing brace" $ do + scanRenderedHtml "ok.html" "x" + `shouldBe` Right [SpliceAttribute "valid"] + + it "skips the parse entirely when the bytes have no splice marker" $ do + -- The pre-check short-circuits to Right [] without invoking parseHTML, + -- so even malformed HTML returns clean — there is nothing for the lint + -- to find when the bytes contain neither '${' nor a colon-tag. + scanRenderedHtml "bad.html" "
" + `shouldBe` Right [] + + describe "formatWarning" $ do + it "formats element warnings as a tag" $ do + formatWarning (SpliceElement "ema:tite") `shouldBe` "" + + it "formats attribute warnings as a dollar-brace token" $ do + formatWarning (SpliceAttribute "value:siteUrl") `shouldBe` "${value:siteUrl}"