From ac344d50e6ca1adb9c5481cb6fee715a6ca3196f Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Wed, 29 Apr 2026 11:52:05 +1000
Subject: [PATCH 01/11] Mass-conservative update for non-stiffly-accurate
 stage_value

For Dt(g(u)) with a nonlinear g (Richards moisture content theta(h) is
the canonical case), PR #204 already builds the conservative
two-evaluation form g(U_i) - g(u0) inside getFormStage, so the per-stage
equations are correct for any RK method. Mass conservation still failed
for non-stiffly-accurate methods like ImplicitMidpoint and
GaussLegendre(2) because _update_Ainv computes u_new as a linear
combination of u0 and the stage values. For linear Dt(u) that linear
combination is algebraically equivalent to the conservative time
integration, but for theta nonlinear it is not, and the conservation
property the stage equations establish is destroyed at the update step.

This routes the non-SA non-Bernstein path through a new conservative
update solve when the form has a composite Dt: solve
  replace(F_dtless, {u0: unew}) - replace(F_dtless, {u0: u0})
       + dt * sum_i b_i * F_remainder(stage_i) = 0
for u_new. For g = identity it collapses to the old
inner(unew - u0, v)*dx head, so the linear case is unchanged. For
nonlinear g it is a discrete statement of the global conservation
invariant and u_new satisfies it by construction. Linear Dt(u) keeps
_update_Ainv since that path also handles DAE structure correctly,
which the conservative variational head does not (the algebraic block
is left unconstrained, leading to a singular linear solve).

The composite-Dt detection lives in irksome/ufl/manipulation.py as the
public helper has_composite_time_derivative(F, u0). It walks
TimeDerivative nodes, exempting structural views (Indexed, ListTensor,
ComponentTensor), linear differential operators (Grad, Div, Curl), DG
facet operators (CellAvg, FacetAvg, restrictions), and arithmetic that
is linear in u0 (Sum, Product/Division with at most one operand
depending on u0). Pure-time forcing terms Dt(f(t,x)) are passed
straight through to the chain rule. The docstring carries an explicit
warning that detection is syntactic: pre-interpolating an expression
in u0 into a Function and writing Dt(that_function) loses the
dependence and silently produces a non-conservative discretisation.

The DIRK stage-derivative path folds the chain rule into the
K-substitution and has no clean conservative analogue without
restructuring. Rather than silently mis-discretising, DIRKTimeStepper
now raises NotImplementedError when the form has a composite Dt and
points users at stage_type=value. use_collocation_update=True
similarly refuses composite Dt because the collocation polynomials
terminal value is also a linear combination of stages.

New tests in tests/test_conservative_dt.py cover the seven implicit
tableaux Irksome ships (BackwardEuler, RadauIIA(2), LobattoIIIC(2),
Alexander, ImplicitMidpoint, GaussLegendre(2), QinZhang) on the
existing Richards problem with mass error below 1e-10 over twenty
steps; the DIRK and use_collocation_update refusals; the
linear-arithmetic classification matrix (Dt(2u), Dt(u/2),
Dt(u + sin(t)) classified linear; Dt(u*u), Dt(theta(u)), Dt(1/u)
classified composite); and identity-runs-and-decays sanity across
stage_value and DIRK. Targeted regression on test_dirk, test_dae,
test_explicit, test_galerkin, test_disc_galerkin, test_split,
test_time_form_splitting, test_mass_conservation, test_pc, test_bern,
test_butcher, test_differentiation, test_curl all pass: 243 passed,
2 skipped, 0 failed.
---
 irksome/dirk_stepper.py       |  15 ++
 irksome/stage_value.py        |  87 +++++++++---
 irksome/ufl/manipulation.py   | 108 ++++++++++++++-
 tests/test_conservative_dt.py | 251 ++++++++++++++++++++++++++++++++++
 4 files changed, 441 insertions(+), 20 deletions(-)
 create mode 100644 tests/test_conservative_dt.py

diff --git a/irksome/dirk_stepper.py b/irksome/dirk_stepper.py
index 13e01fe0..088f25e0 100644
--- a/irksome/dirk_stepper.py
+++ b/irksome/dirk_stepper.py
@@ -6,6 +6,7 @@
 from ufl.constantvalue import as_ufl
 
 from .ufl.deriv import TimeDerivative, expand_time_derivatives
+from .ufl.manipulation import has_composite_time_derivative
 from .constant import vecconst
 from .tools import replace
 from .constant import MeshConstant
@@ -79,6 +80,20 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None, Fp=None,
                  **kwargs):
         assert butcher_tableau.is_diagonally_implicit
 
+        # The DIRK stage-derivative form chain-rules Dt(g(u)) into
+        # g'(u)*Dt(u), which is not mass-conservative for nonlinear g.
+        # Refuse such forms here rather than silently returning a
+        # non-conservative answer; users can fall back to
+        # ``stage_type="value"`` which builds the conservative
+        # two-evaluation discretisation in getFormStage.
+        if has_composite_time_derivative(F, u0):
+            raise NotImplementedError(
+                "DIRKTimeStepper cannot mass-conservatively discretise "
+                "Dt(g(u)) for a nonlinear function g of the prognostic "
+                "variable.  Use stage_type='value' instead, which builds "
+                "a conservative two-evaluation discretisation."
+            )
+
         self.num_steps = 0
         self.num_nonlinear_iterations = 0
         self.num_linear_iterations = 0
diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index 176ffd00..30586b85 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -11,7 +11,9 @@
 from .bcs import stage2spaces4bc
 from .tableaux.ButcherTableaux import CollocationButcherTableau
 from .ufl.deriv import expand_time_derivatives
-from .ufl.manipulation import split_time_derivative_terms, remove_time_derivatives
+from .ufl.manipulation import (has_composite_time_derivative,
+                               split_time_derivative_terms,
+                               remove_time_derivatives)
 from .tools import AI, dot, reshape, replace
 from .constant import vecconst
 from .base_time_stepper import StageCoupledTimeStepper
@@ -199,6 +201,23 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
             assert (self.basis_type is None or self.basis_type == "Lagrange"), "Collocation update requires the Lagrange form of the collocation polynomial"
             assert (len(set(nodes)) == self.butcher_tableau.num_stages + 1), "Need a non-confluent collocation method to use collocation update"
 
+            # The collocation update evaluates the collocation polynomial
+            # at t=1 as a linear combination of u0 and the stages.  That
+            # combination is not mass-conservative when Dt(g(u)) has a
+            # nonlinear g, because g(linear combo) != linear combo of
+            # g(stage_i).  Refuse rather than silently returning a
+            # non-conservative answer; users can disable use_collocation_update
+            # to fall back to the conservative variational update path.
+            if has_composite_time_derivative(F, u0):
+                raise NotImplementedError(
+                    "use_collocation_update=True is incompatible with "
+                    "Dt(g(u)) for a nonlinear g of the prognostic variable: "
+                    "the collocation polynomial's terminal value is a "
+                    "linear combination of stages and is not "
+                    "mass-conservative.  Disable use_collocation_update "
+                    "to use the conservative variational update."
+                )
+
             lag_basis = LagrangePolynomialSet(ufc_simplex(1), nodes)
             collocation_vander = vecconst(lag_basis.tabulate((1.0,))[(0,)])
 
@@ -206,15 +225,38 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
             self._update = self._update_collocation
 
         elif (not butcher_tableau.is_stiffly_accurate) and (basis_type != "Bernstein"):
-            try:
-                A = butcher_tableau.A
-                b = butcher_tableau.b
-                self.bAinv = vecconst(numpy.linalg.solve(A.T, b))
-                self.update_scale = 1-numpy.sum(self.bAinv)
-                self._update = self._update_Ainv
-            except numpy.linalg.LinAlgError:
+            # For composite Dt(g(u)) we need the conservative variational
+            # update; the bAinv linear combination of stages is correct
+            # for g = identity but breaks for nonlinear g.  For the
+            # linear case we keep the bAinv shortcut: it is conservative
+            # by construction (g = id makes both formulations agree)
+            # AND it handles DAEs correctly, which the conservative
+            # variational update does not (it leaves the algebraic
+            # components of u_new unconstrained, producing a singular
+            # update Jacobian).
+            if has_composite_time_derivative(F, u0):
+                # Note: composite Dt(g(u)) on a DAE form (where some
+                # component of u0 has no time evolution) is not
+                # supported by this path -- the conservative variational
+                # update leaves the algebraic block unconstrained, and
+                # SNES will fail with a singular linear solve.  We do
+                # not detect this up-front because the available
+                # syntactic checks (is_ode etc.) are confused by the
+                # chain-rule promotion of Dt(u[i]) -> Dt(u)[i].  Stiffly
+                # accurate methods (RadauIIA, BackwardEuler) avoid this
+                # since u_new = U_s and no update solve is needed.
                 self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                 self._update = self._update_general
+            else:
+                try:
+                    A = butcher_tableau.A
+                    b = butcher_tableau.b
+                    self.bAinv = vecconst(numpy.linalg.solve(A.T, b))
+                    self.update_scale = 1-numpy.sum(self.bAinv)
+                    self._update = self._update_Ainv
+                except numpy.linalg.LinAlgError:
+                    self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
+                    self._update = self._update_general
         else:
             self._update = self._update_stiff_acc
 
@@ -232,20 +274,33 @@ def _update_stiff_acc(self):
             u0bit.assign(self.stages.subfunctions[self.num_fields*(self.num_stages-1)+i])
 
     def get_update_solver(self, update_solver_parameters):
-        # only form update stuff if we need it
-        # which means neither stiffly accurate nor Vandermonde
-        v, = self.F.arguments()
-        unew = Function(self.u0.function_space())
-        Fupdate = inner(unew - self.u0, v) * dx
-
-        C = vecconst(self.butcher_tableau.c)
-        B = vecconst(self.butcher_tableau.b)
+        # Build a conservative variational update for u_new from the
+        # stage values.  The head is the same two-evaluation form used
+        # in the stage equations, evaluated at unew and u0:
+        #
+        #     replace(F_dtless, {u0: unew}) - replace(F_dtless, {u0: u0})
+        #
+        # which expands to inner(g(unew) - g(u0), v)*dx for the typical
+        # mass term inner(Dt(g(u)), v)*dx.  For g = identity this is
+        # exactly inner(unew - u0, v)*dx -- the previous head -- so the
+        # discrete update equation is unchanged in the linear case.
         F = self.F
         t = self.t
         dt = self.dt
         u0 = self.u0
+        unew = Function(u0.function_space())
+
         split_form = split_time_derivative_terms(F, t=t, timedep_coeffs=(u0,))
+        F_dtless = remove_time_derivatives(split_form.time)
         F_remainder = expand_time_derivatives(split_form.remainder, t=t, timedep_coeffs=())
+
+        # Two-evaluation conservative head.  Subtracting the two
+        # replaced forms keeps the test function v unchanged in both,
+        # so the result is a single Form valid for assembly.
+        Fupdate = replace(F_dtless, {u0: unew}) - replace(F_dtless, {u0: u0})
+
+        C = vecconst(self.butcher_tableau.c)
+        B = vecconst(self.butcher_tableau.b)
         u_np = to_value(self.u0, self.stages, self.vandermonde)
 
         for i in range(self.num_stages):
diff --git a/irksome/ufl/manipulation.py b/irksome/ufl/manipulation.py
index c1f9772c..5c4d7d08 100644
--- a/irksome/ufl/manipulation.py
+++ b/irksome/ufl/manipulation.py
@@ -11,19 +11,119 @@
 from operator import or_
 from typing import NamedTuple, Sequence, FrozenSet
 
+from ufl.algorithms.analysis import extract_type
 from ufl.corealg.traversal import traverse_unique_terminals
 from ufl.corealg.dag_traverser import DAGTraverser
 from ufl.classes import (
     BaseForm, CellAvg, Coefficient, ComponentTensor,
-    Conj, Cross, Derivative, Division, Dot, Expr, FacetAvg,
-    Form, FormSum, Indexed, IndexSum, Inner, Integral,
+    Conj, Cross, Curl, Derivative, Div, Division, Dot, Expr, FacetAvg,
+    Form, FormSum, Grad, Indexed, IndexSum, Inner, Integral,
     ListTensor, MultiIndex, NegativeRestricted, Outer, PositiveRestricted,
-    Product, Sum, Variable,
+    Product, ReferenceGrad, ReferenceValue, Sum, Variable,
 )
 
 from .deriv import TimeDerivative
 
-__all__ = ("SplitTimeForm", "check_integrals", "split_time_derivative_terms", "remove_time_derivatives")
+__all__ = ("SplitTimeForm", "check_integrals", "split_time_derivative_terms",
+           "remove_time_derivatives", "has_composite_time_derivative")
+
+
+# UFL classes that propagate the chain rule linearly through their
+# operands.  Mirrors the ``terminal_modifier`` rule list in
+# ``TimeDerivativeRuleset`` plus the structural compositors that build
+# vector- and mixed-function views (ListTensor, IndexSum, ComponentTensor)
+# and facet/restriction operators that appear in DG forms (Restricted,
+# CellAvg, FacetAvg).  A TimeDerivative whose operand is built only from
+# these classes wrapping the prognostic variable u0 is safe to chain-rule:
+# the result is ``Dt(u0)`` (or an Indexed view of it) and the standard
+# substitution in the steppers handles it correctly.
+_PASS_THROUGH_OPS = (
+    CellAvg, ComponentTensor, Conj, Curl, Div, FacetAvg, Grad, Indexed,
+    IndexSum, ListTensor, NegativeRestricted, PositiveRestricted,
+    ReferenceGrad, ReferenceValue, Variable,
+)
+
+
+def _depends_on(expr, u0):
+    """True iff ``u0`` appears as a terminal in ``expr``."""
+    return u0 in traverse_unique_terminals(expr)
+
+
+def _is_pass_through(f, u0):
+    """True iff ``f`` is u0 wrapped in a tree of UFL operations that are
+    linear in u0.  Used by :func:`has_composite_time_derivative` to
+    decide whether ``Dt(f)`` is safe to chain-rule (``True``) or
+    represents ``Dt(g(u0))`` for some nonlinear g (``False``).
+
+    Linear operations come in two flavours.  Structural / unary linear
+    ops (in ``_PASS_THROUGH_OPS``) propagate linearity through their
+    only operand.  Algebraic binary ops (``Sum``, ``Product``,
+    ``Division``) are linear under restricted conditions: a sum is
+    linear iff each summand is either u0-independent or itself linear
+    in u0; a product is linear iff at most one factor depends on u0
+    and that factor is itself linear; a quotient is linear iff its
+    denominator does not depend on u0 and the numerator is linear.
+    """
+    if f is u0:
+        return True
+    if isinstance(f, MultiIndex):
+        return True
+    if isinstance(f, _PASS_THROUGH_OPS):
+        return all(_is_pass_through(c, u0) for c in f.ufl_operands)
+    if isinstance(f, Sum):
+        return all((not _depends_on(c, u0)) or _is_pass_through(c, u0)
+                   for c in f.ufl_operands)
+    if isinstance(f, Product):
+        deps = [_depends_on(c, u0) for c in f.ufl_operands]
+        if sum(deps) > 1:
+            return False
+        return all((not dep) or _is_pass_through(c, u0)
+                   for c, dep in zip(f.ufl_operands, deps))
+    if isinstance(f, Division):
+        num, den = f.ufl_operands
+        if _depends_on(den, u0):
+            return False
+        return (not _depends_on(num, u0)) or _is_pass_through(num, u0)
+    return False
+
+
+def has_composite_time_derivative(F, u0):
+    """True iff ``F`` contains a TimeDerivative of a non-trivial expression
+    in u0 -- i.e. ``Dt(g(u0))`` for some nonlinear g.  These cases lose
+    mass conservation when chain-ruled through the stage-derivative form,
+    and require the conservative two-evaluation discretisation.
+
+    Cases that are NOT flagged:
+
+    * ``Dt(u0)``                     -- the prognostic variable itself
+    * ``Dt(u0[i])`` / ``Dt(split(u0))`` -- structural views for mixed/vector spaces
+    * ``Dt(grad(u0))`` and other linear differential operators
+    * ``Dt(u0('+'))`` and other facet restrictions
+    * ``Dt(c*u0)``, ``Dt(u0/c)``, ``Dt(u0 + h(t))`` -- u0 multiplied or
+      offset by anything that does not itself depend on u0
+    * ``Dt(f(t, x))``                -- operand does not involve u0 at all
+
+    .. warning::
+
+       The detection is purely *syntactic*: it checks whether ``u0``
+       appears as a UFL terminal under ``Dt``.  If a user creates an
+       intermediate :class:`~firedrake.Function` whose values were
+       interpolated from an expression in u0 and then writes
+       ``Dt(that_intermediate)``, the syntactic dependence on u0 is
+       lost and this function will declare the form safe.  The
+       resulting discretisation is *not* mass-conservative.  Always
+       wrap the symbolic expression directly in ``Dt`` (as
+       ``Dt(theta(u))``, not ``Dt(theta_function)``).
+    """
+    for td in extract_type(F, TimeDerivative):
+        f, = td.ufl_operands
+        if not _depends_on(f, u0):
+            # Dt(f(t,x)) -- chain-ruled analytically, no u0 dependence
+            continue
+        if _is_pass_through(f, u0):
+            continue
+        return True
+    return False
 
 
 class SplitTimeForm(NamedTuple):
diff --git a/tests/test_conservative_dt.py b/tests/test_conservative_dt.py
new file mode 100644
index 00000000..c96cfff4
--- /dev/null
+++ b/tests/test_conservative_dt.py
@@ -0,0 +1,251 @@
+"""Tests for conservative-by-default mass terms in stage_value.
+
+The contract:
+
+* For a form ``inner(Dt(g(u)), v)*dx + ...`` with a nonlinear g of the
+  prognostic variable u, the stage-value stepper must conserve total
+  mass to the nonlinear-solve tolerance, for SA *and* non-SA tableaux.
+
+* For g = identity (the default ``Dt(u)`` case) the discrete equations
+  must be unchanged from master.  This is covered by the existing test
+  suite -- see Group 5 / Regression preservation in the design notes.
+
+* DIRK stage-derivative form silently chain-rules ``Dt(g(u))`` and so
+  cannot be made mass-conservative without restructuring.  We refuse
+  it explicitly rather than returning a wrong answer.
+"""
+import pytest
+from firedrake import (
+    Constant, Function, FunctionSpace, TestFunction,
+    UnitIntervalMesh, UnitSquareMesh,
+    assemble, ds, dx, exp, grad, inner,
+)
+
+from irksome import (
+    Alexander, BackwardEuler, Dt, GaussLegendre, LobattoIIIC, MeshConstant,
+    QinZhang, RadauIIA, TimeStepper,
+)
+from irksome.ufl.manipulation import has_composite_time_derivative
+
+
+SOLVER = {
+    "snes_type": "newtonls",
+    "snes_rtol": 1e-12,
+    "snes_atol": 1e-14,
+    "ksp_type": "preonly",
+    "pc_type": "lu",
+    "mat_type": "aij",
+}
+
+
+def _theta(h, theta_r=Constant(0.15), theta_s=Constant(0.45),
+           alpha=Constant(0.328)):
+    return theta_r + (theta_s - theta_r) * exp(alpha * h)
+
+
+# -- Conservation correctness for stage_value, all implicit tableaux ------
+
+def _run_richards(scheme, steps=20, dt_val=100.0, **kwargs):
+    """Run the Richards problem with a flux BC; return cumulative
+    |mass-balance| error over the integration window."""
+    mesh = UnitSquareMesh(8, 8)
+    V = FunctionSpace(mesh, "CG", 1)
+    h = Function(V, name="h").assign(-1.0)
+    v = TestFunction(V)
+
+    MC = MeshConstant(mesh)
+    t = MC.Constant(0.0)
+    dt = MC.Constant(dt_val)
+    flux = Constant(1e-6)
+
+    F = inner(Dt(_theta(h)), v) * dx
+    F += inner(Constant(1e-5) * exp(Constant(0.328) * h) * grad(h), grad(v)) * dx
+    F -= inner(flux, v) * ds(4)
+
+    stepper = TimeStepper(F, scheme, t, dt, h,
+                          solver_parameters=SOLVER, **kwargs)
+
+    one = Function(V).interpolate(Constant(1.0))
+    mform = inner(_theta(h), one) * dx
+    area = assemble(1 * ds(4, domain=mesh))
+
+    prev = assemble(mform)
+    err = 0.0
+    for _ in range(steps):
+        expected = prev + area * float(flux) * float(dt)
+        stepper.advance()
+        t.assign(float(t) + float(dt))
+        cur = assemble(mform)
+        err += abs(cur - expected)
+        prev = cur
+    return err
+
+
+# All implicit tableaux Irksome ships, exercised through the stage-value
+# stepper which builds the conservative two-evaluation discretisation.
+@pytest.mark.parametrize("scheme", [
+    BackwardEuler(),    # SA, single-stage, was already passing on master
+    RadauIIA(2),        # SA, multi-stage, was already passing on master
+    LobattoIIIC(2),     # SA, 2-stage; exercises _update_stiff_acc
+    Alexander(),        # SA, 3-stage, has negative b_2 = -0.64 -- regression case
+    GaussLegendre(1),   # non-SA, single-stage (=ImplicitMidpoint)
+    GaussLegendre(2),   # non-SA, 2-stage fully implicit
+    QinZhang(),         # non-SA, 2-stage DIRK structure
+], ids=["BackwardEuler", "RadauIIA2", "LobattoIIIC2", "Alexander",
+        "ImplicitMidpoint", "GaussLegendre2", "QinZhang"])
+def test_conservation_stage_value(scheme):
+    err = _run_richards(scheme, stage_type="value")
+    assert err < 1e-10, (
+        f"Cumulative mass error {err:.3e} > 1e-10 for "
+        f"{type(scheme).__name__} via stage_type='value'."
+    )
+
+
+# -- DIRK refuses composite Dt rather than silently mis-discretising ------
+
+@pytest.mark.parametrize("scheme", [Alexander(), QinZhang(), GaussLegendre(1)],
+                         ids=["Alexander", "QinZhang", "ImplicitMidpoint"])
+def test_dirk_refuses_composite_dt(scheme):
+    """DIRK stage-derivative chain-rules Dt(g(u)) and is not mass-conservative
+    for nonlinear g.  It must refuse such forms explicitly."""
+    mesh = UnitSquareMesh(4, 4)
+    V = FunctionSpace(mesh, "CG", 1)
+    h = Function(V).assign(-1.0)
+    v = TestFunction(V)
+    MC = MeshConstant(mesh)
+    t = MC.Constant(0.0)
+    dt = MC.Constant(0.1)
+
+    F = inner(Dt(_theta(h)), v) * dx + inner(grad(h), grad(v)) * dx
+
+    with pytest.raises(NotImplementedError, match="stage_type='value'"):
+        TimeStepper(F, scheme, t, dt, h, stage_type="dirk",
+                    solver_parameters=SOLVER)
+
+
+def test_dirk_accepts_linear_dt():
+    """The DIRK guard must NOT trigger on the standard Dt(u) case.
+    This is the regression check that ensures we haven't broken the
+    common case while adding the composite-Dt refusal."""
+    mesh = UnitIntervalMesh(8)
+    V = FunctionSpace(mesh, "CG", 1)
+    u = Function(V)
+    v = TestFunction(V)
+    MC = MeshConstant(mesh)
+    t = MC.Constant(0.0)
+    dt = MC.Constant(0.1)
+
+    F = inner(Dt(u), v) * dx + inner(grad(u), grad(v)) * dx
+
+    stepper = TimeStepper(F, QinZhang(), t, dt, u, stage_type="dirk",
+                          solver_parameters=SOLVER)
+    stepper.advance()  # must not raise
+
+
+# -- Collocation update also refuses composite Dt -------------------------
+
+def test_pass_through_handles_linear_arithmetic():
+    """has_composite_time_derivative must NOT flag Dt(c*u), Dt(u/c),
+    or Dt(u + h(t)) as composite -- they're all linear in u and the
+    chain rule (or the linear stage_value path) handles them correctly.
+    Falsely flagging them routes through the conservative variational
+    head, which produces a singular Jacobian for DAEs."""
+    from ufl import sin
+    mesh = UnitIntervalMesh(4)
+    V = FunctionSpace(mesh, "CG", 1)
+    u = Function(V)
+    t = Constant(0.0)
+
+    # Linear arithmetic forms that must be classified as not-composite.
+    forms = {
+        "Dt(2*u)":      Dt(Constant(2.0) * u),
+        "Dt(u/2)":      Dt(u / Constant(2.0)),
+        "Dt(u + sin(t))": Dt(u + sin(t)),
+        "Dt(2*u + 3)":  Dt(Constant(2.0) * u + Constant(3.0)),
+    }
+    v = TestFunction(V)
+    for name, expr in forms.items():
+        F = inner(expr, v) * dx
+        assert not has_composite_time_derivative(F, u), (
+            f"{name} was incorrectly flagged as composite -- the "
+            "linear stage_value path would be skipped, breaking DAEs."
+        )
+
+    # Genuinely nonlinear cases must still be flagged.
+    nonlinear = {
+        "Dt(u*u)":   Dt(u * u),
+        "Dt(theta(u))": Dt(_theta(u)),
+        "Dt(1/u)":   Dt(Constant(1.0) / u),
+    }
+    for name, expr in nonlinear.items():
+        F = inner(expr, v) * dx
+        assert has_composite_time_derivative(F, u), (
+            f"{name} was missed -- this would silently produce a "
+            "non-conservative discretisation."
+        )
+
+
+def test_collocation_update_refuses_composite_dt():
+    """use_collocation_update produces u_new as a linear combination of
+    stages, which is not conservative for nonlinear g.  Refuse it."""
+    mesh = UnitSquareMesh(4, 4)
+    V = FunctionSpace(mesh, "CG", 1)
+    h = Function(V).assign(-1.0)
+    v = TestFunction(V)
+    MC = MeshConstant(mesh)
+    t = MC.Constant(0.0)
+    dt = MC.Constant(0.1)
+
+    F = inner(Dt(_theta(h)), v) * dx + inner(grad(h), grad(v)) * dx
+
+    with pytest.raises(NotImplementedError, match="use_collocation_update"):
+        TimeStepper(F, GaussLegendre(2), t, dt, h, stage_type="value",
+                    use_collocation_update=True,
+                    solver_parameters=SOLVER)
+
+
+# -- Identity regression: linear Dt(u) still works for every stepper -----
+
+def _run_heat_identity(scheme, steps=4, dt_val=0.05, **kwargs):
+    """Linear heat: Dt(u) = Δu, return final u dofs.  A baseline sanity
+    check that nothing has been silently broken for g = identity."""
+    import numpy as np
+    from ufl import sin, pi
+    mesh = UnitIntervalMesh(8)
+    V = FunctionSpace(mesh, "CG", 1)
+    u = Function(V, name="u")
+    x, = mesh.coordinates
+    u.interpolate(sin(pi * x))
+    v = TestFunction(V)
+    MC = MeshConstant(mesh)
+    t = MC.Constant(0.0)
+    dt = MC.Constant(dt_val)
+
+    F = inner(Dt(u), v) * dx + inner(grad(u), grad(v)) * dx
+
+    stepper = TimeStepper(F, scheme, t, dt, u,
+                          solver_parameters=SOLVER, **kwargs)
+    for _ in range(steps):
+        stepper.advance()
+        t.assign(float(t) + float(dt))
+    return np.linalg.norm(u.dat.data_ro)
+
+
+@pytest.mark.parametrize("scheme,kw", [
+    (BackwardEuler(),  {"stage_type": "value"}),
+    (GaussLegendre(1), {"stage_type": "value"}),
+    (QinZhang(),       {"stage_type": "value"}),
+    (RadauIIA(2),      {"stage_type": "value"}),
+    (BackwardEuler(),  {"stage_type": "dirk"}),
+    (QinZhang(),       {"stage_type": "dirk"}),
+    (Alexander(),      {"stage_type": "dirk"}),
+], ids=["BE-value", "GL1-value", "QinZhang-value", "RadauIIA2-value",
+        "BE-dirk", "QinZhang-dirk", "Alexander-dirk"])
+def test_identity_runs_and_decays(scheme, kw):
+    """g = identity: every stepper must run, and the heat solution must
+    decay sensibly.  Catches accidental semantic changes for the linear
+    case across the steppers we touched."""
+    norm = _run_heat_identity(scheme, **kw)
+    assert 0.0 < norm < 5.0, (
+        f"Heat norm out of range for {type(scheme).__name__} {kw}: {norm:.3e}"
+    )

From f2ac418c2ce9b42e143993f85555bd47d1f59b02 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Mon, 11 May 2026 11:56:57 +1000
Subject: [PATCH 02/11] Updating solver vlaues for testing

---
 irksome/stage_value.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index 30586b85..3a4627bd 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -190,6 +190,14 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
                          splitting=splitting, butcher_tableau=butcher_tableau, bounds=bounds,
                          **kwargs)
 
+        # Conservative variational update inherits the stage solve's
+        # parameters by default: same form structure, same Jacobian
+        # sparsity, so the same preconditioner is the right starting
+        # point.  Callers who want a different setup can pass
+        # update_solver_parameters explicitly.
+        if update_solver_parameters is None:
+            update_solver_parameters = solver_parameters
+
         self.set_initial_guess()
 
         if use_collocation_update:
@@ -325,6 +333,13 @@ def get_update_solver(self, update_solver_parameters):
         return unew, update_solver
 
     def _update_general(self):
+        # Seed unew with u0 before solving.  The conservative head's
+        # Jacobian is the moisture-capacity-weighted mass matrix
+        # C(unew)*v*phi*dx, and for soils like Haverkamp the capacity
+        # vanishes at h = 0 -- so a fresh Function (zero-initialised)
+        # gives a singular initial Jacobian and SNES fails before
+        # taking any Newton step.  u0 is the natural warm start anyway.
+        self.unew.assign(self.u0)
         self.update_solver.solve()
         self.u0.assign(self.unew)
 

From dc1d0729a9d6b4231443d31c328fd6418b2ccfc8 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Wed, 13 May 2026 16:45:02 +1000
Subject: [PATCH 03/11] Drop DIRK and collocation-update refusal for composite
 Dt

The auto-detection PR initially refused composite Dt(g(u)) in the DIRK
stage-derivative path and in stage_value with use_collocation_update=True,
since both produce a u_new that is a linear combination of stages and is
therefore not mass-conservative for nonlinear g.

That refusal sat outside the scope of this PR (which is about making
non-stiffly-accurate stage_value mass-conservative) and was a behaviour
change for users who knew about the non-conservation in DIRK and were
prepared to live with it.  Dropping the guards keeps this PR focused on
the one diagnosis it is making and lets the DIRK question be handled
separately if and when we choose to fix or warn there.

The corresponding two refusal tests are removed; the
linear-arithmetic classification matrix and the
mass-conservation/identity regressions remain.
---
 irksome/dirk_stepper.py       | 15 --------
 irksome/stage_value.py        | 18 ----------
 tests/test_conservative_dt.py | 66 +----------------------------------
 3 files changed, 1 insertion(+), 98 deletions(-)

diff --git a/irksome/dirk_stepper.py b/irksome/dirk_stepper.py
index 088f25e0..13e01fe0 100644
--- a/irksome/dirk_stepper.py
+++ b/irksome/dirk_stepper.py
@@ -6,7 +6,6 @@
 from ufl.constantvalue import as_ufl
 
 from .ufl.deriv import TimeDerivative, expand_time_derivatives
-from .ufl.manipulation import has_composite_time_derivative
 from .constant import vecconst
 from .tools import replace
 from .constant import MeshConstant
@@ -80,20 +79,6 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None, Fp=None,
                  **kwargs):
         assert butcher_tableau.is_diagonally_implicit
 
-        # The DIRK stage-derivative form chain-rules Dt(g(u)) into
-        # g'(u)*Dt(u), which is not mass-conservative for nonlinear g.
-        # Refuse such forms here rather than silently returning a
-        # non-conservative answer; users can fall back to
-        # ``stage_type="value"`` which builds the conservative
-        # two-evaluation discretisation in getFormStage.
-        if has_composite_time_derivative(F, u0):
-            raise NotImplementedError(
-                "DIRKTimeStepper cannot mass-conservatively discretise "
-                "Dt(g(u)) for a nonlinear function g of the prognostic "
-                "variable.  Use stage_type='value' instead, which builds "
-                "a conservative two-evaluation discretisation."
-            )
-
         self.num_steps = 0
         self.num_nonlinear_iterations = 0
         self.num_linear_iterations = 0
diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index 12f1956b..c608b217 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -199,24 +199,6 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
         if use_collocation_update:
             # Use the terminal value of the collocation polynomial to update the solution. Note: collocation update is only implemented for constant-in-time boundary conditions.
             # TODO: create an assertion to check for constant-in-time boundary conditions.
-
-            # The collocation update evaluates the collocation polynomial
-            # at t=1 as a linear combination of u0 and the stages.  That
-            # combination is not mass-conservative when Dt(g(u)) has a
-            # nonlinear g, because g(linear combo) != linear combo of
-            # g(stage_i).  Refuse rather than silently returning a
-            # non-conservative answer; users can disable use_collocation_update
-            # to fall back to the conservative variational update path.
-            if has_composite_time_derivative(F, u0):
-                raise NotImplementedError(
-                    "use_collocation_update=True is incompatible with "
-                    "Dt(g(u)) for a nonlinear g of the prognostic variable: "
-                    "the collocation polynomial's terminal value is a "
-                    "linear combination of stages and is not "
-                    "mass-conservative.  Disable use_collocation_update "
-                    "to use the conservative variational update."
-                )
-
             self.collocation_vander = self.tabulate_poly((1.0,))
             self._update = self._update_collocation
 
diff --git a/tests/test_conservative_dt.py b/tests/test_conservative_dt.py
index c96cfff4..e8a6670c 100644
--- a/tests/test_conservative_dt.py
+++ b/tests/test_conservative_dt.py
@@ -9,10 +9,6 @@
 * For g = identity (the default ``Dt(u)`` case) the discrete equations
   must be unchanged from master.  This is covered by the existing test
   suite -- see Group 5 / Regression preservation in the design notes.
-
-* DIRK stage-derivative form silently chain-rules ``Dt(g(u))`` and so
-  cannot be made mass-conservative without restructuring.  We refuse
-  it explicitly rather than returning a wrong answer.
 """
 import pytest
 from firedrake import (
@@ -101,48 +97,7 @@ def test_conservation_stage_value(scheme):
     )
 
 
-# -- DIRK refuses composite Dt rather than silently mis-discretising ------
-
-@pytest.mark.parametrize("scheme", [Alexander(), QinZhang(), GaussLegendre(1)],
-                         ids=["Alexander", "QinZhang", "ImplicitMidpoint"])
-def test_dirk_refuses_composite_dt(scheme):
-    """DIRK stage-derivative chain-rules Dt(g(u)) and is not mass-conservative
-    for nonlinear g.  It must refuse such forms explicitly."""
-    mesh = UnitSquareMesh(4, 4)
-    V = FunctionSpace(mesh, "CG", 1)
-    h = Function(V).assign(-1.0)
-    v = TestFunction(V)
-    MC = MeshConstant(mesh)
-    t = MC.Constant(0.0)
-    dt = MC.Constant(0.1)
-
-    F = inner(Dt(_theta(h)), v) * dx + inner(grad(h), grad(v)) * dx
-
-    with pytest.raises(NotImplementedError, match="stage_type='value'"):
-        TimeStepper(F, scheme, t, dt, h, stage_type="dirk",
-                    solver_parameters=SOLVER)
-
-
-def test_dirk_accepts_linear_dt():
-    """The DIRK guard must NOT trigger on the standard Dt(u) case.
-    This is the regression check that ensures we haven't broken the
-    common case while adding the composite-Dt refusal."""
-    mesh = UnitIntervalMesh(8)
-    V = FunctionSpace(mesh, "CG", 1)
-    u = Function(V)
-    v = TestFunction(V)
-    MC = MeshConstant(mesh)
-    t = MC.Constant(0.0)
-    dt = MC.Constant(0.1)
-
-    F = inner(Dt(u), v) * dx + inner(grad(u), grad(v)) * dx
-
-    stepper = TimeStepper(F, QinZhang(), t, dt, u, stage_type="dirk",
-                          solver_parameters=SOLVER)
-    stepper.advance()  # must not raise
-
-
-# -- Collocation update also refuses composite Dt -------------------------
+# -- has_composite_time_derivative classification -------------------------
 
 def test_pass_through_handles_linear_arithmetic():
     """has_composite_time_derivative must NOT flag Dt(c*u), Dt(u/c),
@@ -185,25 +140,6 @@ def test_pass_through_handles_linear_arithmetic():
         )
 
 
-def test_collocation_update_refuses_composite_dt():
-    """use_collocation_update produces u_new as a linear combination of
-    stages, which is not conservative for nonlinear g.  Refuse it."""
-    mesh = UnitSquareMesh(4, 4)
-    V = FunctionSpace(mesh, "CG", 1)
-    h = Function(V).assign(-1.0)
-    v = TestFunction(V)
-    MC = MeshConstant(mesh)
-    t = MC.Constant(0.0)
-    dt = MC.Constant(0.1)
-
-    F = inner(Dt(_theta(h)), v) * dx + inner(grad(h), grad(v)) * dx
-
-    with pytest.raises(NotImplementedError, match="use_collocation_update"):
-        TimeStepper(F, GaussLegendre(2), t, dt, h, stage_type="value",
-                    use_collocation_update=True,
-                    solver_parameters=SOLVER)
-
-
 # -- Identity regression: linear Dt(u) still works for every stepper -----
 
 def _run_heat_identity(scheme, steps=4, dt_val=0.05, **kwargs):

From 5a54f50ad431062ef0f0f2b7d6ea26233b57de77 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Wed, 13 May 2026 17:16:15 +1000
Subject: [PATCH 04/11] Test reorg: extend test_mass_conservation, isolate
 classifier test

Drop tests/test_conservative_dt.py and route its content into the
appropriate existing places.

* tests/test_mass_conservation.py already runs the exponential Richards
  problem with a flux BC and asserts mass error near machine precision.
  Add GaussLegendre(1) (= ImplicitMidpoint), GaussLegendre(2) and
  QinZhang to its parametrize list.  These three tableaux are exactly
  the ones that exercise the new conservative variational update path
  for non-stiffly-accurate stage_value -- they fail on master with mass
  errors of 4e-9 to 9e-8 (the linear-combination update destroying the
  conservation property the stage equations build for nonlinear theta)
  and sit at machine precision on this branch.

* tests/test_has_composite_time_derivative.py is a new standalone test
  for the public helper introduced in irksome/ufl/manipulation.py.
  Two cases: linear-arithmetic forms (Dt(c*u), Dt(u/c), Dt(u + sin(t)))
  must not be flagged, otherwise the conservative variational head
  would replace the linear-combination update and break DAEs; truly
  nonlinear forms (Dt(u*u), Dt(theta(u)), Dt(1/u)) must be flagged,
  otherwise non-SA stage_value would silently produce a
  non-conservative discretisation.

The identity-regression heat tests previously in test_conservative_dt
are dropped as redundant: the existing test_dirk, test_galerkin,
test_disc_galerkin and similar suites already exercise linear forms
across every tableau the project ships.
---
 tests/test_conservative_dt.py               | 187 --------------------
 tests/test_has_composite_time_derivative.py |  75 ++++++++
 tests/test_mass_conservation.py             |  20 ++-
 3 files changed, 92 insertions(+), 190 deletions(-)
 delete mode 100644 tests/test_conservative_dt.py
 create mode 100644 tests/test_has_composite_time_derivative.py

diff --git a/tests/test_conservative_dt.py b/tests/test_conservative_dt.py
deleted file mode 100644
index e8a6670c..00000000
--- a/tests/test_conservative_dt.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""Tests for conservative-by-default mass terms in stage_value.
-
-The contract:
-
-* For a form ``inner(Dt(g(u)), v)*dx + ...`` with a nonlinear g of the
-  prognostic variable u, the stage-value stepper must conserve total
-  mass to the nonlinear-solve tolerance, for SA *and* non-SA tableaux.
-
-* For g = identity (the default ``Dt(u)`` case) the discrete equations
-  must be unchanged from master.  This is covered by the existing test
-  suite -- see Group 5 / Regression preservation in the design notes.
-"""
-import pytest
-from firedrake import (
-    Constant, Function, FunctionSpace, TestFunction,
-    UnitIntervalMesh, UnitSquareMesh,
-    assemble, ds, dx, exp, grad, inner,
-)
-
-from irksome import (
-    Alexander, BackwardEuler, Dt, GaussLegendre, LobattoIIIC, MeshConstant,
-    QinZhang, RadauIIA, TimeStepper,
-)
-from irksome.ufl.manipulation import has_composite_time_derivative
-
-
-SOLVER = {
-    "snes_type": "newtonls",
-    "snes_rtol": 1e-12,
-    "snes_atol": 1e-14,
-    "ksp_type": "preonly",
-    "pc_type": "lu",
-    "mat_type": "aij",
-}
-
-
-def _theta(h, theta_r=Constant(0.15), theta_s=Constant(0.45),
-           alpha=Constant(0.328)):
-    return theta_r + (theta_s - theta_r) * exp(alpha * h)
-
-
-# -- Conservation correctness for stage_value, all implicit tableaux ------
-
-def _run_richards(scheme, steps=20, dt_val=100.0, **kwargs):
-    """Run the Richards problem with a flux BC; return cumulative
-    |mass-balance| error over the integration window."""
-    mesh = UnitSquareMesh(8, 8)
-    V = FunctionSpace(mesh, "CG", 1)
-    h = Function(V, name="h").assign(-1.0)
-    v = TestFunction(V)
-
-    MC = MeshConstant(mesh)
-    t = MC.Constant(0.0)
-    dt = MC.Constant(dt_val)
-    flux = Constant(1e-6)
-
-    F = inner(Dt(_theta(h)), v) * dx
-    F += inner(Constant(1e-5) * exp(Constant(0.328) * h) * grad(h), grad(v)) * dx
-    F -= inner(flux, v) * ds(4)
-
-    stepper = TimeStepper(F, scheme, t, dt, h,
-                          solver_parameters=SOLVER, **kwargs)
-
-    one = Function(V).interpolate(Constant(1.0))
-    mform = inner(_theta(h), one) * dx
-    area = assemble(1 * ds(4, domain=mesh))
-
-    prev = assemble(mform)
-    err = 0.0
-    for _ in range(steps):
-        expected = prev + area * float(flux) * float(dt)
-        stepper.advance()
-        t.assign(float(t) + float(dt))
-        cur = assemble(mform)
-        err += abs(cur - expected)
-        prev = cur
-    return err
-
-
-# All implicit tableaux Irksome ships, exercised through the stage-value
-# stepper which builds the conservative two-evaluation discretisation.
-@pytest.mark.parametrize("scheme", [
-    BackwardEuler(),    # SA, single-stage, was already passing on master
-    RadauIIA(2),        # SA, multi-stage, was already passing on master
-    LobattoIIIC(2),     # SA, 2-stage; exercises _update_stiff_acc
-    Alexander(),        # SA, 3-stage, has negative b_2 = -0.64 -- regression case
-    GaussLegendre(1),   # non-SA, single-stage (=ImplicitMidpoint)
-    GaussLegendre(2),   # non-SA, 2-stage fully implicit
-    QinZhang(),         # non-SA, 2-stage DIRK structure
-], ids=["BackwardEuler", "RadauIIA2", "LobattoIIIC2", "Alexander",
-        "ImplicitMidpoint", "GaussLegendre2", "QinZhang"])
-def test_conservation_stage_value(scheme):
-    err = _run_richards(scheme, stage_type="value")
-    assert err < 1e-10, (
-        f"Cumulative mass error {err:.3e} > 1e-10 for "
-        f"{type(scheme).__name__} via stage_type='value'."
-    )
-
-
-# -- has_composite_time_derivative classification -------------------------
-
-def test_pass_through_handles_linear_arithmetic():
-    """has_composite_time_derivative must NOT flag Dt(c*u), Dt(u/c),
-    or Dt(u + h(t)) as composite -- they're all linear in u and the
-    chain rule (or the linear stage_value path) handles them correctly.
-    Falsely flagging them routes through the conservative variational
-    head, which produces a singular Jacobian for DAEs."""
-    from ufl import sin
-    mesh = UnitIntervalMesh(4)
-    V = FunctionSpace(mesh, "CG", 1)
-    u = Function(V)
-    t = Constant(0.0)
-
-    # Linear arithmetic forms that must be classified as not-composite.
-    forms = {
-        "Dt(2*u)":      Dt(Constant(2.0) * u),
-        "Dt(u/2)":      Dt(u / Constant(2.0)),
-        "Dt(u + sin(t))": Dt(u + sin(t)),
-        "Dt(2*u + 3)":  Dt(Constant(2.0) * u + Constant(3.0)),
-    }
-    v = TestFunction(V)
-    for name, expr in forms.items():
-        F = inner(expr, v) * dx
-        assert not has_composite_time_derivative(F, u), (
-            f"{name} was incorrectly flagged as composite -- the "
-            "linear stage_value path would be skipped, breaking DAEs."
-        )
-
-    # Genuinely nonlinear cases must still be flagged.
-    nonlinear = {
-        "Dt(u*u)":   Dt(u * u),
-        "Dt(theta(u))": Dt(_theta(u)),
-        "Dt(1/u)":   Dt(Constant(1.0) / u),
-    }
-    for name, expr in nonlinear.items():
-        F = inner(expr, v) * dx
-        assert has_composite_time_derivative(F, u), (
-            f"{name} was missed -- this would silently produce a "
-            "non-conservative discretisation."
-        )
-
-
-# -- Identity regression: linear Dt(u) still works for every stepper -----
-
-def _run_heat_identity(scheme, steps=4, dt_val=0.05, **kwargs):
-    """Linear heat: Dt(u) = Δu, return final u dofs.  A baseline sanity
-    check that nothing has been silently broken for g = identity."""
-    import numpy as np
-    from ufl import sin, pi
-    mesh = UnitIntervalMesh(8)
-    V = FunctionSpace(mesh, "CG", 1)
-    u = Function(V, name="u")
-    x, = mesh.coordinates
-    u.interpolate(sin(pi * x))
-    v = TestFunction(V)
-    MC = MeshConstant(mesh)
-    t = MC.Constant(0.0)
-    dt = MC.Constant(dt_val)
-
-    F = inner(Dt(u), v) * dx + inner(grad(u), grad(v)) * dx
-
-    stepper = TimeStepper(F, scheme, t, dt, u,
-                          solver_parameters=SOLVER, **kwargs)
-    for _ in range(steps):
-        stepper.advance()
-        t.assign(float(t) + float(dt))
-    return np.linalg.norm(u.dat.data_ro)
-
-
-@pytest.mark.parametrize("scheme,kw", [
-    (BackwardEuler(),  {"stage_type": "value"}),
-    (GaussLegendre(1), {"stage_type": "value"}),
-    (QinZhang(),       {"stage_type": "value"}),
-    (RadauIIA(2),      {"stage_type": "value"}),
-    (BackwardEuler(),  {"stage_type": "dirk"}),
-    (QinZhang(),       {"stage_type": "dirk"}),
-    (Alexander(),      {"stage_type": "dirk"}),
-], ids=["BE-value", "GL1-value", "QinZhang-value", "RadauIIA2-value",
-        "BE-dirk", "QinZhang-dirk", "Alexander-dirk"])
-def test_identity_runs_and_decays(scheme, kw):
-    """g = identity: every stepper must run, and the heat solution must
-    decay sensibly.  Catches accidental semantic changes for the linear
-    case across the steppers we touched."""
-    norm = _run_heat_identity(scheme, **kw)
-    assert 0.0 < norm < 5.0, (
-        f"Heat norm out of range for {type(scheme).__name__} {kw}: {norm:.3e}"
-    )
diff --git a/tests/test_has_composite_time_derivative.py b/tests/test_has_composite_time_derivative.py
new file mode 100644
index 00000000..30b11da0
--- /dev/null
+++ b/tests/test_has_composite_time_derivative.py
@@ -0,0 +1,75 @@
+"""Classification contract for ``has_composite_time_derivative``.
+
+The helper is what routes stage_value between the existing
+linear-combination update (``_update_Ainv``) and the new conservative
+variational update.  Misclassification has two failure modes:
+
+* False negative on a nonlinear ``Dt(g(u))`` would silently drop the form
+  back onto the linear-combination update and lose mass conservation for
+  non-stiffly-accurate tableaux.
+
+* False positive on a linear ``Dt(c*u)`` or ``Dt(u + f(t))`` would route
+  the form through the conservative variational head, which has no
+  algebraic block and breaks DAEs.
+
+Pin both directions of the contract here so a future change to the
+walker cannot quietly regress either case.
+"""
+import pytest
+from firedrake import (
+    Constant, Function, FunctionSpace, TestFunction,
+    UnitIntervalMesh, dx, exp, inner,
+)
+from ufl import sin
+
+from irksome import Dt
+from irksome.ufl.manipulation import has_composite_time_derivative
+
+
+def _theta(h, theta_r=Constant(0.15), theta_s=Constant(0.45),
+           alpha=Constant(0.328)):
+    """Exponential soil moisture, the canonical nonlinear g(h) we care about."""
+    return theta_r + (theta_s - theta_r) * exp(alpha * h)
+
+
+@pytest.fixture
+def setup():
+    mesh = UnitIntervalMesh(4)
+    V = FunctionSpace(mesh, "CG", 1)
+    return V, Function(V), TestFunction(V), Constant(0.0)
+
+
+def test_linear_arithmetic_is_not_composite(setup):
+    """Constant-coefficient scalings of u and additive time forcings must
+    not be flagged.  These are linear in u; ``_update_Ainv`` is correct
+    for them and is also the path that handles DAE structure."""
+    V, u, v, t = setup
+    forms = {
+        "Dt(2*u)":         Dt(Constant(2.0) * u),
+        "Dt(u/2)":         Dt(u / Constant(2.0)),
+        "Dt(u + sin(t))":  Dt(u + sin(t)),
+        "Dt(2*u + 3)":     Dt(Constant(2.0) * u + Constant(3.0)),
+    }
+    for name, expr in forms.items():
+        F = inner(expr, v) * dx
+        assert not has_composite_time_derivative(F, u), (
+            f"{name} was incorrectly flagged as composite -- the linear "
+            "stage_value path would be skipped, breaking DAEs."
+        )
+
+
+def test_nonlinear_is_composite(setup):
+    """Genuinely nonlinear g(u) inside Dt must be flagged so that
+    stage_value routes through the conservative variational update."""
+    V, u, v, t = setup
+    nonlinear = {
+        "Dt(u*u)":      Dt(u * u),
+        "Dt(theta(u))": Dt(_theta(u)),
+        "Dt(1/u)":      Dt(Constant(1.0) / u),
+    }
+    for name, expr in nonlinear.items():
+        F = inner(expr, v) * dx
+        assert has_composite_time_derivative(F, u), (
+            f"{name} was missed -- this would silently produce a "
+            "non-conservative discretisation for non-SA stage_value."
+        )
diff --git a/tests/test_mass_conservation.py b/tests/test_mass_conservation.py
index 68c32721..3ea24c26 100644
--- a/tests/test_mass_conservation.py
+++ b/tests/test_mass_conservation.py
@@ -11,7 +11,10 @@
     Constant, Function, FunctionSpace, TestFunction,
     UnitSquareMesh, assemble, ds, dx, exp, grad, inner,
 )
-from irksome import BackwardEuler, DiscontinuousGalerkinScheme, Dt, RadauIIA, TimeStepper, BDF, AdamsMoulton, MultistepTableau
+from irksome import (
+    AdamsMoulton, BackwardEuler, BDF, DiscontinuousGalerkinScheme, Dt,
+    GaussLegendre, MultistepTableau, QinZhang, RadauIIA, TimeStepper,
+)
 import numpy as np
 
 
@@ -80,8 +83,19 @@ def run_richards(scheme, **kwargs):
     return mean_error
 
 
-@pytest.mark.parametrize("scheme", [BackwardEuler(), RadauIIA(2), BDF(1), AdamsMoulton(0), AdamsMoulton(1), AdamsMoulton(2)],
-                         ids=["BackwardEuler", "RadauIIA2", "BDF1", "AM0", "AM1", "AM2"])
+# The three non-SA tableaux at the end (GaussLegendre(1) = ImplicitMidpoint,
+# GaussLegendre(2), QinZhang) exercise the conservative variational update
+# path introduced for non-stiffly-accurate stage_value.  On master they fail
+# this test with mass errors of order 1e-6 to 1e-7 because the linear-
+# combination update destroys the conservation property the stage equations
+# build for nonlinear theta(h).
+@pytest.mark.parametrize("scheme",
+                         [BackwardEuler(), RadauIIA(2), BDF(1),
+                          AdamsMoulton(0), AdamsMoulton(1), AdamsMoulton(2),
+                          GaussLegendre(1), GaussLegendre(2), QinZhang()],
+                         ids=["BackwardEuler", "RadauIIA2", "BDF1",
+                              "AM0", "AM1", "AM2",
+                              "ImplicitMidpoint", "GaussLegendre2", "QinZhang"])
 def test_mass_conservation_stage_value(scheme):
     """Test mass conservation with Dt(theta(h))"""
     err = run_richards(scheme, stage_type="value")

From fb28b88f27da8302dfb12c1a0c130a3c9e011fee Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Wed, 13 May 2026 20:43:59 +1000
Subject: [PATCH 05/11] Address review: rename, simplify update head, tighten
 comments

Per Pablo's review:

* Rename has_composite_time_derivative -> has_nonlinear_time_derivative.
  "Composite" was internal jargon; "nonlinear" is what we are actually
  detecting and reads more directly at the call sites.  Test file
  renamed in lockstep.

* Simplify the conservative update head from
  replace(F_dtless, {u0: unew}) - replace(F_dtless, {u0: u0})
  to replace(F_dtless, {u0: unew}) - F_dtless: the second replacement
  is an identity transformation, so the two forms are byte-identical.
  Pablo's suggested phrasing.

* Move the explanation of the two-evaluation conservative head from
  inline comments into the docstring of get_update_solver, and state
  the math in user-facing terms (inner(g(unew) - g(u0), v) * dx)
  rather than via the internal F_dtless symbol.  Drop the redundant
  "subtracting the two replaced forms keeps v unchanged" comment.

* Compress the warm-start comment in _update_general to a single line
  and drop the application-specific (Haverkamp / moisture capacity)
  language.  The failure mode is generic: g prime of u0 can vanish,
  in which case a zero-initialised iterate gives a singular initial
  Jacobian.

* Move the update_solver_parameters = solver_parameters default into
  the conservative-update branch that actually consumes it.  Pablo
  pointed out the previous placement implied the inheritance always
  applies; in fact the linear _update_Ainv path does not use it, so
  scoping the default makes the intent clearer.
---
 irksome/stage_value.py                        | 70 +++++++------------
 irksome/ufl/manipulation.py                   | 15 ++--
 ... => test_has_nonlinear_time_derivative.py} | 14 ++--
 3 files changed, 41 insertions(+), 58 deletions(-)
 rename tests/{test_has_composite_time_derivative.py => test_has_nonlinear_time_derivative.py} (85%)

diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index c608b217..4b7b2ced 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -11,7 +11,7 @@
 from .bcs import stage2spaces4bc
 from .tableaux.ButcherTableaux import CollocationButcherTableau
 from .ufl.deriv import expand_time_derivatives
-from .ufl.manipulation import (has_composite_time_derivative,
+from .ufl.manipulation import (has_nonlinear_time_derivative,
                                split_time_derivative_terms,
                                remove_time_derivatives)
 from .tools import AI, dot, reshape, replace
@@ -186,14 +186,6 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
                          sample_points=sample_points,
                          **kwargs)
 
-        # Conservative variational update inherits the stage solve's
-        # parameters by default: same form structure, same Jacobian
-        # sparsity, so the same preconditioner is the right starting
-        # point.  Callers who want a different setup can pass
-        # update_solver_parameters explicitly.
-        if update_solver_parameters is None:
-            update_solver_parameters = solver_parameters
-
         self.set_initial_guess()
 
         if use_collocation_update:
@@ -203,26 +195,19 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
             self._update = self._update_collocation
 
         elif (not butcher_tableau.is_stiffly_accurate) and (vandermonde is None):
-            # For composite Dt(g(u)) we need the conservative variational
+            # For nonlinear Dt(g(u)) we need the conservative variational
             # update; the bAinv linear combination of stages is correct
             # for g = identity but breaks for nonlinear g.  For the
             # linear case we keep the bAinv shortcut: it is conservative
             # by construction (g = id makes both formulations agree)
             # AND it handles DAEs correctly, which the conservative
-            # variational update does not (it leaves the algebraic
-            # components of u_new unconstrained, producing a singular
-            # update Jacobian).
-            if has_composite_time_derivative(F, u0):
-                # Note: composite Dt(g(u)) on a DAE form (where some
-                # component of u0 has no time evolution) is not
-                # supported by this path -- the conservative variational
-                # update leaves the algebraic block unconstrained, and
-                # SNES will fail with a singular linear solve.  We do
-                # not detect this up-front because the available
-                # syntactic checks (is_ode etc.) are confused by the
-                # chain-rule promotion of Dt(u[i]) -> Dt(u)[i].  Stiffly
-                # accurate methods (RadauIIA, BackwardEuler) avoid this
-                # since u_new = U_s and no update solve is needed.
+            # variational update does not.
+            if has_nonlinear_time_derivative(F, u0):
+                # The update solve uses the same form residual as the
+                # stage solve, posed on V rather than V^s.  Inherit
+                # solver parameters; callers can override.
+                if update_solver_parameters is None:
+                    update_solver_parameters = solver_parameters
                 self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                 self._update = self._update_general
             else:
@@ -233,6 +218,8 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
                     self.update_scale = 1-numpy.sum(self.bAinv)
                     self._update = self._update_Ainv
                 except numpy.linalg.LinAlgError:
+                    if update_solver_parameters is None:
+                        update_solver_parameters = solver_parameters
                     self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                     self._update = self._update_general
         else:
@@ -252,16 +239,19 @@ def _update_stiff_acc(self):
             u0bit.assign(self.stages.subfunctions[self.num_fields*(self.num_stages-1)+i])
 
     def get_update_solver(self, update_solver_parameters):
-        # Build a conservative variational update for u_new from the
-        # stage values.  The head is the same two-evaluation form used
-        # in the stage equations, evaluated at unew and u0:
-        #
-        #     replace(F_dtless, {u0: unew}) - replace(F_dtless, {u0: u0})
-        #
-        # which expands to inner(g(unew) - g(u0), v)*dx for the typical
-        # mass term inner(Dt(g(u)), v)*dx.  For g = identity this is
-        # exactly inner(unew - u0, v)*dx -- the previous head -- so the
-        # discrete update equation is unchanged in the linear case.
+        """Build a conservative variational update solve for u_new.
+
+        For a mass term ``inner(Dt(g(u)), v) * dx`` the update head is
+
+            inner(g(u_new) - g(u_0), v) * dx
+
+        evaluated at the stage-solve test function ``v``.  For
+        ``g = identity`` it reduces to ``inner(u_new - u_0, v) * dx``,
+        so the discrete update equation is unchanged in the linear
+        case.  The remaining (non-time-derivative) part of the form is
+        contributed by the standard RK quadrature
+        ``sum_i b_i * F_remainder(stage_i)``.
+        """
         F = self.F
         t = self.t
         dt = self.dt
@@ -272,10 +262,7 @@ def get_update_solver(self, update_solver_parameters):
         F_dtless = remove_time_derivatives(split_form.time)
         F_remainder = expand_time_derivatives(split_form.remainder, t=t, timedep_coeffs=())
 
-        # Two-evaluation conservative head.  Subtracting the two
-        # replaced forms keeps the test function v unchanged in both,
-        # so the result is a single Form valid for assembly.
-        Fupdate = replace(F_dtless, {u0: unew}) - replace(F_dtless, {u0: u0})
+        Fupdate = replace(F_dtless, {u0: unew}) - F_dtless
 
         C = vecconst(self.butcher_tableau.c)
         B = vecconst(self.butcher_tableau.b)
@@ -303,12 +290,7 @@ def get_update_solver(self, update_solver_parameters):
         return unew, update_solver
 
     def _update_general(self):
-        # Seed unew with u0 before solving.  The conservative head's
-        # Jacobian is the moisture-capacity-weighted mass matrix
-        # C(unew)*v*phi*dx, and for soils like Haverkamp the capacity
-        # vanishes at h = 0 -- so a fresh Function (zero-initialised)
-        # gives a singular initial Jacobian and SNES fails before
-        # taking any Newton step.  u0 is the natural warm start anyway.
+        # Constant-in-time initial guess to prevent singular Jacobian
         self.unew.assign(self.u0)
         self.update_solver.solve()
         self.u0.assign(self.unew)
diff --git a/irksome/ufl/manipulation.py b/irksome/ufl/manipulation.py
index 5c4d7d08..1d23dea2 100644
--- a/irksome/ufl/manipulation.py
+++ b/irksome/ufl/manipulation.py
@@ -25,7 +25,7 @@
 from .deriv import TimeDerivative
 
 __all__ = ("SplitTimeForm", "check_integrals", "split_time_derivative_terms",
-           "remove_time_derivatives", "has_composite_time_derivative")
+           "remove_time_derivatives", "has_nonlinear_time_derivative")
 
 
 # UFL classes that propagate the chain rule linearly through their
@@ -51,7 +51,7 @@ def _depends_on(expr, u0):
 
 def _is_pass_through(f, u0):
     """True iff ``f`` is u0 wrapped in a tree of UFL operations that are
-    linear in u0.  Used by :func:`has_composite_time_derivative` to
+    linear in u0.  Used by :func:`has_nonlinear_time_derivative` to
     decide whether ``Dt(f)`` is safe to chain-rule (``True``) or
     represents ``Dt(g(u0))`` for some nonlinear g (``False``).
 
@@ -87,11 +87,12 @@ def _is_pass_through(f, u0):
     return False
 
 
-def has_composite_time_derivative(F, u0):
-    """True iff ``F`` contains a TimeDerivative of a non-trivial expression
-    in u0 -- i.e. ``Dt(g(u0))`` for some nonlinear g.  These cases lose
-    mass conservation when chain-ruled through the stage-derivative form,
-    and require the conservative two-evaluation discretisation.
+def has_nonlinear_time_derivative(F, u0):
+    """True iff ``F`` contains a TimeDerivative of an expression that is
+    nonlinear in u0 -- i.e. ``Dt(g(u0))`` for some nonlinear g.  These
+    cases lose mass conservation when chain-ruled through the
+    stage-derivative form, and require the conservative two-evaluation
+    discretisation.
 
     Cases that are NOT flagged:
 
diff --git a/tests/test_has_composite_time_derivative.py b/tests/test_has_nonlinear_time_derivative.py
similarity index 85%
rename from tests/test_has_composite_time_derivative.py
rename to tests/test_has_nonlinear_time_derivative.py
index 30b11da0..5d742585 100644
--- a/tests/test_has_composite_time_derivative.py
+++ b/tests/test_has_nonlinear_time_derivative.py
@@ -1,4 +1,4 @@
-"""Classification contract for ``has_composite_time_derivative``.
+"""Classification contract for ``has_nonlinear_time_derivative``.
 
 The helper is what routes stage_value between the existing
 linear-combination update (``_update_Ainv``) and the new conservative
@@ -23,7 +23,7 @@
 from ufl import sin
 
 from irksome import Dt
-from irksome.ufl.manipulation import has_composite_time_derivative
+from irksome.ufl.manipulation import has_nonlinear_time_derivative
 
 
 def _theta(h, theta_r=Constant(0.15), theta_s=Constant(0.45),
@@ -39,7 +39,7 @@ def setup():
     return V, Function(V), TestFunction(V), Constant(0.0)
 
 
-def test_linear_arithmetic_is_not_composite(setup):
+def test_linear_arithmetic_is_not_flagged(setup):
     """Constant-coefficient scalings of u and additive time forcings must
     not be flagged.  These are linear in u; ``_update_Ainv`` is correct
     for them and is also the path that handles DAE structure."""
@@ -52,13 +52,13 @@ def test_linear_arithmetic_is_not_composite(setup):
     }
     for name, expr in forms.items():
         F = inner(expr, v) * dx
-        assert not has_composite_time_derivative(F, u), (
-            f"{name} was incorrectly flagged as composite -- the linear "
+        assert not has_nonlinear_time_derivative(F, u), (
+            f"{name} was incorrectly flagged as nonlinear -- the linear "
             "stage_value path would be skipped, breaking DAEs."
         )
 
 
-def test_nonlinear_is_composite(setup):
+def test_nonlinear_is_flagged(setup):
     """Genuinely nonlinear g(u) inside Dt must be flagged so that
     stage_value routes through the conservative variational update."""
     V, u, v, t = setup
@@ -69,7 +69,7 @@ def test_nonlinear_is_composite(setup):
     }
     for name, expr in nonlinear.items():
         F = inner(expr, v) * dx
-        assert has_composite_time_derivative(F, u), (
+        assert has_nonlinear_time_derivative(F, u), (
             f"{name} was missed -- this would silently produce a "
             "non-conservative discretisation for non-SA stage_value."
         )

From af058db57d707d4181e7247d9cc0669833216028 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Wed, 13 May 2026 21:09:42 +1000
Subject: [PATCH 06/11] Lint fixes: drop unused firedrake imports, remove dict
 alignment spaces

---
 irksome/stage_value.py                      |  3 +--
 tests/test_has_nonlinear_time_derivative.py | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index 4b7b2ced..0d2ea210 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -3,8 +3,7 @@
 from FIAT import Bernstein, ufc_simplex
 from FIAT.barycentric_interpolation import LagrangePolynomialSet
 from firedrake import (Function, NonlinearVariationalProblem,
-                       NonlinearVariationalSolver, TestFunction, dx,
-                       inner)
+                       NonlinearVariationalSolver, TestFunction)
 from ufl import as_tensor, Form
 from ufl.constantvalue import as_ufl
 
diff --git a/tests/test_has_nonlinear_time_derivative.py b/tests/test_has_nonlinear_time_derivative.py
index 5d742585..b36e994d 100644
--- a/tests/test_has_nonlinear_time_derivative.py
+++ b/tests/test_has_nonlinear_time_derivative.py
@@ -45,10 +45,10 @@ def test_linear_arithmetic_is_not_flagged(setup):
     for them and is also the path that handles DAE structure."""
     V, u, v, t = setup
     forms = {
-        "Dt(2*u)":         Dt(Constant(2.0) * u),
-        "Dt(u/2)":         Dt(u / Constant(2.0)),
-        "Dt(u + sin(t))":  Dt(u + sin(t)),
-        "Dt(2*u + 3)":     Dt(Constant(2.0) * u + Constant(3.0)),
+        "Dt(2*u)": Dt(Constant(2.0) * u),
+        "Dt(u/2)": Dt(u / Constant(2.0)),
+        "Dt(u + sin(t))": Dt(u + sin(t)),
+        "Dt(2*u + 3)": Dt(Constant(2.0) * u + Constant(3.0)),
     }
     for name, expr in forms.items():
         F = inner(expr, v) * dx
@@ -63,9 +63,9 @@ def test_nonlinear_is_flagged(setup):
     stage_value routes through the conservative variational update."""
     V, u, v, t = setup
     nonlinear = {
-        "Dt(u*u)":      Dt(u * u),
+        "Dt(u*u)": Dt(u * u),
         "Dt(theta(u))": Dt(_theta(u)),
-        "Dt(1/u)":      Dt(Constant(1.0) / u),
+        "Dt(1/u)": Dt(Constant(1.0) / u),
     }
     for name, expr in nonlinear.items():
         F = inner(expr, v) * dx

From 68de83781e6302760fb57e3bbc0897be453831bd Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Thu, 14 May 2026 13:43:23 +1000
Subject: [PATCH 07/11] Replace nonlinear-Dt classifier with ufl.derivative
 probe

Per Pablo's review on manipulation.py: the bespoke walker with its
_PASS_THROUGH_OPS exemption list and _is_pass_through / _depends_on
helpers is a maintenance liability -- every linear UFL operator added
upstream silently misclassifies until Irksome's exemption list is
updated to match.

ufl.derivative already knows the linearity of Grad, Div, Indexed,
ListTensor, ComponentTensor, restrictions, CellAvg, FacetAvg, and any
future linear node UFL adds.  Reformulating the classifier as

    D = expand_derivatives(derivative(f, u0, TrialFunction(V)))
    if u0 in extract_coefficients(D): nonlinear

inherits that classification for free and collapses the walker to its
extract_type(F, TimeDerivative) loop plus three lines.

Verified against tests/test_has_nonlinear_time_derivative.py (all
seven cases agree with the walker) and tests/test_mass_conservation.py
(all nine tableaux still conserve to machine precision).
---
 irksome/ufl/manipulation.py | 98 ++++++++-----------------------------
 1 file changed, 20 insertions(+), 78 deletions(-)

diff --git a/irksome/ufl/manipulation.py b/irksome/ufl/manipulation.py
index 1d23dea2..78c154c0 100644
--- a/irksome/ufl/manipulation.py
+++ b/irksome/ufl/manipulation.py
@@ -11,15 +11,17 @@
 from operator import or_
 from typing import NamedTuple, Sequence, FrozenSet
 
-from ufl.algorithms.analysis import extract_type
+from ufl import TrialFunction, derivative
+from ufl.algorithms import expand_derivatives
+from ufl.algorithms.analysis import extract_coefficients, extract_type
 from ufl.corealg.traversal import traverse_unique_terminals
 from ufl.corealg.dag_traverser import DAGTraverser
 from ufl.classes import (
     BaseForm, CellAvg, Coefficient, ComponentTensor,
-    Conj, Cross, Curl, Derivative, Div, Division, Dot, Expr, FacetAvg,
+    Conj, Cross, Derivative, Div, Division, Dot, Expr, FacetAvg,
     Form, FormSum, Grad, Indexed, IndexSum, Inner, Integral,
     ListTensor, MultiIndex, NegativeRestricted, Outer, PositiveRestricted,
-    Product, ReferenceGrad, ReferenceValue, Sum, Variable,
+    Product, Sum, Variable,
 )
 
 from .deriv import TimeDerivative
@@ -28,65 +30,6 @@
            "remove_time_derivatives", "has_nonlinear_time_derivative")
 
 
-# UFL classes that propagate the chain rule linearly through their
-# operands.  Mirrors the ``terminal_modifier`` rule list in
-# ``TimeDerivativeRuleset`` plus the structural compositors that build
-# vector- and mixed-function views (ListTensor, IndexSum, ComponentTensor)
-# and facet/restriction operators that appear in DG forms (Restricted,
-# CellAvg, FacetAvg).  A TimeDerivative whose operand is built only from
-# these classes wrapping the prognostic variable u0 is safe to chain-rule:
-# the result is ``Dt(u0)`` (or an Indexed view of it) and the standard
-# substitution in the steppers handles it correctly.
-_PASS_THROUGH_OPS = (
-    CellAvg, ComponentTensor, Conj, Curl, Div, FacetAvg, Grad, Indexed,
-    IndexSum, ListTensor, NegativeRestricted, PositiveRestricted,
-    ReferenceGrad, ReferenceValue, Variable,
-)
-
-
-def _depends_on(expr, u0):
-    """True iff ``u0`` appears as a terminal in ``expr``."""
-    return u0 in traverse_unique_terminals(expr)
-
-
-def _is_pass_through(f, u0):
-    """True iff ``f`` is u0 wrapped in a tree of UFL operations that are
-    linear in u0.  Used by :func:`has_nonlinear_time_derivative` to
-    decide whether ``Dt(f)`` is safe to chain-rule (``True``) or
-    represents ``Dt(g(u0))`` for some nonlinear g (``False``).
-
-    Linear operations come in two flavours.  Structural / unary linear
-    ops (in ``_PASS_THROUGH_OPS``) propagate linearity through their
-    only operand.  Algebraic binary ops (``Sum``, ``Product``,
-    ``Division``) are linear under restricted conditions: a sum is
-    linear iff each summand is either u0-independent or itself linear
-    in u0; a product is linear iff at most one factor depends on u0
-    and that factor is itself linear; a quotient is linear iff its
-    denominator does not depend on u0 and the numerator is linear.
-    """
-    if f is u0:
-        return True
-    if isinstance(f, MultiIndex):
-        return True
-    if isinstance(f, _PASS_THROUGH_OPS):
-        return all(_is_pass_through(c, u0) for c in f.ufl_operands)
-    if isinstance(f, Sum):
-        return all((not _depends_on(c, u0)) or _is_pass_through(c, u0)
-                   for c in f.ufl_operands)
-    if isinstance(f, Product):
-        deps = [_depends_on(c, u0) for c in f.ufl_operands]
-        if sum(deps) > 1:
-            return False
-        return all((not dep) or _is_pass_through(c, u0)
-                   for c, dep in zip(f.ufl_operands, deps))
-    if isinstance(f, Division):
-        num, den = f.ufl_operands
-        if _depends_on(den, u0):
-            return False
-        return (not _depends_on(num, u0)) or _is_pass_through(num, u0)
-    return False
-
-
 def has_nonlinear_time_derivative(F, u0):
     """True iff ``F`` contains a TimeDerivative of an expression that is
     nonlinear in u0 -- i.e. ``Dt(g(u0))`` for some nonlinear g.  These
@@ -94,20 +37,18 @@ def has_nonlinear_time_derivative(F, u0):
     stage-derivative form, and require the conservative two-evaluation
     discretisation.
 
-    Cases that are NOT flagged:
-
-    * ``Dt(u0)``                     -- the prognostic variable itself
-    * ``Dt(u0[i])`` / ``Dt(split(u0))`` -- structural views for mixed/vector spaces
-    * ``Dt(grad(u0))`` and other linear differential operators
-    * ``Dt(u0('+'))`` and other facet restrictions
-    * ``Dt(c*u0)``, ``Dt(u0/c)``, ``Dt(u0 + h(t))`` -- u0 multiplied or
-      offset by anything that does not itself depend on u0
-    * ``Dt(f(t, x))``                -- operand does not involve u0 at all
+    For each ``Dt(f)`` in the form, the Gateaux derivative of ``f`` with
+    respect to ``u0`` is taken in a trial direction.  If the derivative
+    still depends on ``u0``, ``f`` is nonlinear in u0.  This delegates
+    the classification of linear operators (Grad, Div, Indexed,
+    restrictions, ListTensor, ComponentTensor, ...) to UFL's own
+    derivative machinery rather than maintaining a parallel exemption
+    list inside Irksome.
 
     .. warning::
 
-       The detection is purely *syntactic*: it checks whether ``u0``
-       appears as a UFL terminal under ``Dt``.  If a user creates an
+       The detection is syntactic: it checks whether ``u0`` appears
+       under ``Dt`` after differentiation.  If a user creates an
        intermediate :class:`~firedrake.Function` whose values were
        interpolated from an expression in u0 and then writes
        ``Dt(that_intermediate)``, the syntactic dependence on u0 is
@@ -116,14 +57,15 @@ def has_nonlinear_time_derivative(F, u0):
        wrap the symbolic expression directly in ``Dt`` (as
        ``Dt(theta(u))``, not ``Dt(theta_function)``).
     """
+    Trial = TrialFunction(u0.function_space())
     for td in extract_type(F, TimeDerivative):
         f, = td.ufl_operands
-        if not _depends_on(f, u0):
-            # Dt(f(t,x)) -- chain-ruled analytically, no u0 dependence
-            continue
-        if _is_pass_through(f, u0):
+        if u0 not in extract_coefficients(f):
+            # Dt(f(t,x)) -- no u0 dependence, chain-ruled analytically
             continue
-        return True
+        D = expand_derivatives(derivative(f, u0, Trial))
+        if u0 in extract_coefficients(D):
+            return True
     return False
 
 

From 539b8aa09e1105c8a728e2b4f44487761ae45b1f Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Thu, 14 May 2026 13:43:36 +1000
Subject: [PATCH 08/11] Update solver: hard-coded SPD default, no inheritance
 from stages

Per Pablo's review on stage_value.py: silently inheriting
solver_parameters into the conservative update solve is wrong-answer
unsafe, not just suboptimal.  Stage-tuned options can fail badly when
applied to the update problem -- fieldsplit indices keyed to V^s do
not match V, snes_type='ksponly' returns a wrong answer when the
update head is nonlinear, lagged Jacobians and custom MG transfers
are tuned to operators that have nothing to do with the update.

Drop the inheritance.  When update_solver_parameters is None, use a
fixed default tuned to the update problem itself.  The Jacobian is

    g'(u_new) * v * phi * dx

(the RK quadrature contributes spatial terms at known stage values,
constant in u_new, so they drop from the linearisation) -- a weighted
mass matrix, SPD when g' > 0, with condition number bounded
independent of mesh size.  For that operator the right default is
Newton + CG + bjacobi/icc: scales to any mesh size, single-digit
iteration counts in the canonical case, no failure modes that LU
would handle better.

Non-monotone g breaks SPD and needs an explicit override (GMRES +
a more general preconditioner).  Points where g' vanishes (plateau
soils with theta' = 0 in the saturated zone) trip a zero pivot
regardless of preconditioner choice -- a separate structural issue.

Docstring on get_update_solver now states the no-inheritance
asymmetry explicitly so users do not assume copy-through behaviour.
---
 irksome/stage_value.py | 44 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index 0d2ea210..25e67972 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -18,6 +18,33 @@
 from .base_time_stepper import StageCoupledTimeStepper
 
 
+# Default solver for the conservative update.  The update problem is a
+# nonlinear mass-matrix-like solve on V: the Jacobian is
+# g'(u_new) * v * phi * dx (the spatial terms in the RK quadrature are
+# evaluated at known stage values and are constant in u_new).  That
+# operator is SPD when g'(u) > 0 -- the well-posed case for Dt(g(u))
+# with g monotone -- and its condition number is bounded independent
+# of mesh size, so CG converges in O(1) iterations with a cheap
+# preconditioner.  bjacobi + icc respects the block structure of
+# vector / mixed V and degenerates gracefully on scalar V.
+#
+# Assumptions: g is monotone in u (g'(u) > 0 a.e.).  Points where g'
+# vanishes trip a zero pivot regardless of preconditioner choice.
+# Non-monotone g breaks SPD and needs an explicit override
+# (e.g. GMRES + a more general preconditioner).
+#
+# Does not inherit from solver_parameters: the stage problem lives on
+# V^s = V x ... x V and stage-tuned options -- fieldsplit indices,
+# snes_type='ksponly', lagged Jacobians, custom MG transfers -- do not
+# generally apply to the update solve on V.
+_DEFAULT_UPDATE_SOLVER_PARAMETERS = {
+    'snes_type': 'newtonls',
+    'ksp_type': 'cg',
+    'pc_type': 'bjacobi',
+    'sub_pc_type': 'icc',
+}
+
+
 def to_value(u0, stages, vandermonde):
     """convert from Bernstein to Lagrange representation
 
@@ -202,11 +229,8 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
             # AND it handles DAEs correctly, which the conservative
             # variational update does not.
             if has_nonlinear_time_derivative(F, u0):
-                # The update solve uses the same form residual as the
-                # stage solve, posed on V rather than V^s.  Inherit
-                # solver parameters; callers can override.
                 if update_solver_parameters is None:
-                    update_solver_parameters = solver_parameters
+                    update_solver_parameters = _DEFAULT_UPDATE_SOLVER_PARAMETERS
                 self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                 self._update = self._update_general
             else:
@@ -218,7 +242,7 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
                     self._update = self._update_Ainv
                 except numpy.linalg.LinAlgError:
                     if update_solver_parameters is None:
-                        update_solver_parameters = solver_parameters
+                        update_solver_parameters = _DEFAULT_UPDATE_SOLVER_PARAMETERS
                     self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                     self._update = self._update_general
         else:
@@ -250,6 +274,16 @@ def get_update_solver(self, update_solver_parameters):
         case.  The remaining (non-time-derivative) part of the form is
         contributed by the standard RK quadrature
         ``sum_i b_i * F_remainder(stage_i)``.
+
+        ``update_solver_parameters`` does not inherit from
+        ``solver_parameters``.  The update solve is a different
+        problem from the stage solve -- it is posed on ``V`` rather
+        than ``V^s = V x ... x V``, and its Jacobian is a (nonlinear)
+        weighted mass matrix rather than the stage operator.  Stage-
+        tuned options such as fieldsplit indices, ``snes_type='ksponly'``,
+        lagged Jacobians, or custom multigrid transfers generally do
+        not apply.  If ``update_solver_parameters`` is left as None the
+        default ``_DEFAULT_UPDATE_SOLVER_PARAMETERS`` is used.
         """
         F = self.F
         t = self.t

From ecea8fe9c9f07230d2422b3582898644be020bb4 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Thu, 14 May 2026 22:49:05 +1000
Subject: [PATCH 09/11] Apply suggestions from code review

Co-authored-by: Pablo Brubeck <brubeck@protonmail.com>
---
 irksome/ufl/manipulation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/irksome/ufl/manipulation.py b/irksome/ufl/manipulation.py
index 78c154c0..3adbcd69 100644
--- a/irksome/ufl/manipulation.py
+++ b/irksome/ufl/manipulation.py
@@ -11,7 +11,7 @@
 from operator import or_
 from typing import NamedTuple, Sequence, FrozenSet
 
-from ufl import TrialFunction, derivative
+from ufl import derivative
 from ufl.algorithms import expand_derivatives
 from ufl.algorithms.analysis import extract_coefficients, extract_type
 from ufl.corealg.traversal import traverse_unique_terminals
@@ -57,13 +57,12 @@ def has_nonlinear_time_derivative(F, u0):
        wrap the symbolic expression directly in ``Dt`` (as
        ``Dt(theta(u))``, not ``Dt(theta_function)``).
     """
-    Trial = TrialFunction(u0.function_space())
     for td in extract_type(F, TimeDerivative):
         f, = td.ufl_operands
         if u0 not in extract_coefficients(f):
             # Dt(f(t,x)) -- no u0 dependence, chain-ruled analytically
             continue
-        D = expand_derivatives(derivative(f, u0, Trial))
+        D = expand_derivatives(derivative(f, u0))
         if u0 in extract_coefficients(D):
             return True
     return False

From 05e0d8eed05197921f42f2a843bb31d7e45507b8 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Thu, 14 May 2026 23:00:39 +1000
Subject: [PATCH 10/11] Drop hard-coded update solver default, defer to
 Firedrake's

Per Pablo's follow-up on the update solver default: Irksome does not
override Firedrake's default solver_parameters anywhere else, and
should not introduce a one-off here.  When update_solver_parameters
is None, NonlinearVariationalSolver picks up Firedrake's default
(typically a sparse direct solve).  Users who want CG + bjacobi/icc
or anything else configure it explicitly.

tests/test_mass_conservation.py was previously inheriting tight
snes_rtol/snes_atol from the stage solver into the update solve;
with the inheritance dropped, the conservation assertion (10 * eps)
fails by a factor of ~1 because the default SNES rtol of 1e-8 is the
binding constraint on the update solve precision rather than machine
arithmetic.  Pass update_solver_parameters explicitly for stage_value
runs so the assertion measures conservation rather than SNES
tolerance.  The DG path does not accept update_solver_parameters
(it has no update solve in the same sense) so the kwarg is gated on
stage_type.
---
 irksome/stage_value.py          | 36 +++------------------------------
 tests/test_mass_conservation.py | 12 +++++++----
 2 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/irksome/stage_value.py b/irksome/stage_value.py
index 25e67972..e68969fb 100644
--- a/irksome/stage_value.py
+++ b/irksome/stage_value.py
@@ -18,33 +18,6 @@
 from .base_time_stepper import StageCoupledTimeStepper
 
 
-# Default solver for the conservative update.  The update problem is a
-# nonlinear mass-matrix-like solve on V: the Jacobian is
-# g'(u_new) * v * phi * dx (the spatial terms in the RK quadrature are
-# evaluated at known stage values and are constant in u_new).  That
-# operator is SPD when g'(u) > 0 -- the well-posed case for Dt(g(u))
-# with g monotone -- and its condition number is bounded independent
-# of mesh size, so CG converges in O(1) iterations with a cheap
-# preconditioner.  bjacobi + icc respects the block structure of
-# vector / mixed V and degenerates gracefully on scalar V.
-#
-# Assumptions: g is monotone in u (g'(u) > 0 a.e.).  Points where g'
-# vanishes trip a zero pivot regardless of preconditioner choice.
-# Non-monotone g breaks SPD and needs an explicit override
-# (e.g. GMRES + a more general preconditioner).
-#
-# Does not inherit from solver_parameters: the stage problem lives on
-# V^s = V x ... x V and stage-tuned options -- fieldsplit indices,
-# snes_type='ksponly', lagged Jacobians, custom MG transfers -- do not
-# generally apply to the update solve on V.
-_DEFAULT_UPDATE_SOLVER_PARAMETERS = {
-    'snes_type': 'newtonls',
-    'ksp_type': 'cg',
-    'pc_type': 'bjacobi',
-    'sub_pc_type': 'icc',
-}
-
-
 def to_value(u0, stages, vandermonde):
     """convert from Bernstein to Lagrange representation
 
@@ -229,8 +202,6 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
             # AND it handles DAEs correctly, which the conservative
             # variational update does not.
             if has_nonlinear_time_derivative(F, u0):
-                if update_solver_parameters is None:
-                    update_solver_parameters = _DEFAULT_UPDATE_SOLVER_PARAMETERS
                 self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                 self._update = self._update_general
             else:
@@ -241,8 +212,6 @@ def __init__(self, F, butcher_tableau, t, dt, u0, bcs=None,
                     self.update_scale = 1-numpy.sum(self.bAinv)
                     self._update = self._update_Ainv
                 except numpy.linalg.LinAlgError:
-                    if update_solver_parameters is None:
-                        update_solver_parameters = _DEFAULT_UPDATE_SOLVER_PARAMETERS
                     self.unew, self.update_solver = self.get_update_solver(update_solver_parameters)
                     self._update = self._update_general
         else:
@@ -282,8 +251,9 @@ def get_update_solver(self, update_solver_parameters):
         weighted mass matrix rather than the stage operator.  Stage-
         tuned options such as fieldsplit indices, ``snes_type='ksponly'``,
         lagged Jacobians, or custom multigrid transfers generally do
-        not apply.  If ``update_solver_parameters`` is left as None the
-        default ``_DEFAULT_UPDATE_SOLVER_PARAMETERS`` is used.
+        not apply.  If ``update_solver_parameters`` is None, Firedrake's
+        default solver parameters are used (typically a sparse direct
+        solve).  Pass an explicit dict to override.
         """
         F = self.F
         t = self.t
diff --git a/tests/test_mass_conservation.py b/tests/test_mass_conservation.py
index 3ea24c26..35ec1b98 100644
--- a/tests/test_mass_conservation.py
+++ b/tests/test_mass_conservation.py
@@ -50,11 +50,15 @@ def run_richards(scheme, **kwargs):
         kwargs['startup_parameters'] = {'tableau': RadauIIA(2),
                                         'stepper_kwargs': {'stage_type': 'value'}}
 
+    snes_params = {"snes_rtol": 1e-10, "snes_atol": 1e-14}
+    if kwargs.get("stage_type") == "value":
+        # The update solve does not inherit solver options from the stage
+        # solve; pass tight SNES tolerances explicitly so the conservation
+        # assertion below sits at machine precision rather than at the
+        # default SNES rtol.
+        kwargs["update_solver_parameters"] = snes_params
     stepper = TimeStepper(F, scheme, t, dt, h,
-                          solver_parameters={
-                              "snes_rtol": 1e-10,
-                              "snes_atol": 1e-14,
-                          },
+                          solver_parameters=snes_params,
                           **kwargs)
 
     if isinstance(scheme, MultistepTableau):

From 4a3315ad75e3847ee1caf5c179d442a2a84717e7 Mon Sep 17 00:00:00 2001
From: Sia Ghelichkhan <ghelichkhani.siavash@gmail.com>
Date: Fri, 15 May 2026 12:21:03 +1000
Subject: [PATCH 11/11] Lint: drop now-unused Grad and Div imports

These were exemption-list classes for the bespoke walker that 68de837
replaced with the ufl.derivative probe.  Walker is gone, classes are
unused.
---
 irksome/ufl/manipulation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/irksome/ufl/manipulation.py b/irksome/ufl/manipulation.py
index 3adbcd69..4122a818 100644
--- a/irksome/ufl/manipulation.py
+++ b/irksome/ufl/manipulation.py
@@ -18,8 +18,8 @@
 from ufl.corealg.dag_traverser import DAGTraverser
 from ufl.classes import (
     BaseForm, CellAvg, Coefficient, ComponentTensor,
-    Conj, Cross, Derivative, Div, Division, Dot, Expr, FacetAvg,
-    Form, FormSum, Grad, Indexed, IndexSum, Inner, Integral,
+    Conj, Cross, Derivative, Division, Dot, Expr, FacetAvg,
+    Form, FormSum, Indexed, IndexSum, Inner, Integral,
     ListTensor, MultiIndex, NegativeRestricted, Outer, PositiveRestricted,
     Product, Sum, Variable,
 )