NVIDIA · chesterxgchen · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/.githooks/README.md b/.githooks/README.md
@@ -0,0 +1,20 @@
+# Git hooks
+
+Repo-managed git hooks. Enable them once per clone:
+
+```bash
+git config core.hooksPath .githooks
+```
+
+## `pre-push`
+
+Runs the deterministic agent-skill lint
+(`python -m dev_tools.agent.skills.checks --skills-root skills`) and blocks the
+push if it finds anything, so the agent skills checked into GitHub stay clean.
+It covers `skills/` and the eval suites under `dev_tools/agent/skill_evals/`.
+
+The same lint also runs in `./runtest.sh -s` and in the pre-merge CI unit tests
+(`tests/unit_test/tool/agent_skill_checks/seed_skills_test.py`), so this hook is
+a fast local pre-push gate rather than the only enforcement.
+
+Emergency bypass: `git push --no-verify`.
diff --git a/.githooks/pre-push b/.githooks/pre-push
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# NVFLARE pre-push hook: block a push when the agent skill lint finds anything,
+# so the skills checked into GitHub stay clean.
+#
+# Enable once per clone:
+#     git config core.hooksPath .githooks
+#
+# The lint is fast and dependency-light; it covers skills/ and the eval suites
+# under dev_tools/agent/skill_evals/. The same check runs in `./runtest.sh -s`
+# and in CI. Bypass in an emergency with `git push --no-verify`.
+set -euo pipefail
+
+repo_root="$(git rev-parse --show-toplevel)"
+
+# Nothing to check if this repo has no skills root.
+if [ ! -d "$repo_root/skills" ]; then
+    exit 0
+fi
+
+echo "pre-push: running agent skill lint (python -m dev_tools.agent.skills.checks)..."
+if ! python3 -m dev_tools.agent.skills.checks --skills-root "$repo_root/skills"; then
+    echo ""
+    echo "pre-push: agent skill lint failed. Fix the findings above (or run"
+    echo "          ./runtest.sh -s) before pushing. Emergency bypass:"
+    echo "          git push --no-verify"
+    exit 1
+fi
+echo "pre-push: agent skill lint clean."
diff --git a/.gitignore b/.gitignore
@@ -187,9 +187,18 @@ CLAUDE.local.md
 .cursor/
 .claude/
 .devcontainer/
-tmp/review/
+tmp/
 
 # memory profiler output
 tests/memory_profile/**/*.dat
 /HEAD
 uv.lock
+
+# Local-only agent skill design docs (human reference; not shipped in the PR).
+# Only docs/design/skills_architecture.md is tracked; keep the rest out so a
+# stray `git add -A` cannot re-track them.
+docs/design/agent_skill_authoring.md
+docs/design/agent_skill_checks_report.md
+docs/design/agent_skill_evaluation.md
+docs/design/agent_skill_operating_model.md
+docs/design/export_arg_fidelity.md
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,6 +3,8 @@ include nvflare/_version.py
 include nvflare/libs/*.so
 include nvflare/fuel/utils/*.json
 recursive-include skills *
+global-exclude *.py[co]
+global-exclude __pycache__
 # Build-time only: AgentSkillsBuildPy loads this frontmatter validator to build the
 # bundled-skills manifest. Needed in the sdist so wheels can build from an sdist; it is
 # not installed into the wheel (dev_tools is excluded from packages in setup.py).

diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/evals.json b/dev_tools/agent/skill_evals/nvflare-convert-lightning/evals.json
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/SOURCE.md b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/SOURCE.md
@@ -0,0 +1,36 @@
+# Fixture Source Notes
+
+The `hello-lightning` fixtures are minimized, unconverted PyTorch Lightning
+training code modeled on the NVFLARE repository example:
+
+- Source example: `examples/hello-world/hello-lightning`
+
+The fixture intentionally omits real datasets, data download, FLARE integration,
+and full job execution details so trigger and behavior evals stay deterministic.
+`train.py` and `model.py` represent plain Lightning code before any FLARE
+conversion; the agent under evaluation is expected to add the
+`flare.patch(trainer)` Client API integration and a `job.py`.
+
+The `gpu-device-lightning` fixture is synthetic, derived from
+`hello-lightning` with an explicit `torch.cuda.is_available()` choice between
+Lightning's `gpu` and `cpu` accelerators. It makes device-intent preservation
+applicable without requiring a GPU on the evaluation host.
+
+The `vocab-lightning` fixture adds a `LitTextCNN` model whose `__init__` has a
+required, data-derived argument (`vocab_size`, no default). The conversion must
+pin one shared vocabulary size for the server recipe model config and every
+client model construction path. Passing a live `LightningModule` instance with
+required args can serialize without those args and fail server-side
+reconstruction in the model persistor.
+
+The `external-data-lightning` fixtures are synthetic, derived from the
+`hello-lightning` fixture but loading train/val CSVs from an external data
+directory (`--data-dir`, default `/data/nvflare/lightning-tabular`) instead of
+building synthetic in-memory tensors. The path is intentionally external to the
+repository and run workspace so configurable data-path behavior is asserted only
+when the source provides an external dataset location.
+
+The `hello-lightning` fixture's `LitNet` includes `validation_step` with
+`self.log("val_loss", ...)` and the training entry point builds a validation
+dataloader, so evaluation-focused evals can assert Lightning-native evaluation
+(`trainer.validate` before `trainer.fit`) without a separate fixture.
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/external-data-lightning/model.py b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/external-data-lightning/model.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class LitNet(pl.LightningModule):
+    def __init__(self, input_size=4, num_classes=2, lr=0.01):
+        super().__init__()
+        self.save_hyperparameters()
+        self.fc1 = nn.Linear(input_size, 8)
+        self.fc2 = nn.Linear(8, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        return self.fc2(x)
+
+    def training_step(self, batch, batch_idx):
+        features, labels = batch
+        if labels.numel() == 0:
+            raise ValueError("empty training batch; check per-site data partitioning")
+        loss = F.cross_entropy(self(features), labels)
+        self.log("train_loss", loss)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        features, labels = batch
+        if labels.numel() == 0:
+            raise ValueError("empty validation batch; check per-site data partitioning")
+        loss = F.cross_entropy(self(features), labels)
+        self.log("val_loss", loss)
+
+    def configure_optimizers(self):
+        return torch.optim.SGD(self.parameters(), lr=self.hparams.lr)
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/external-data-lightning/train.py b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/external-data-lightning/train.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import csv
+from pathlib import Path
+
+import pytorch_lightning as pl
+import torch
+from model import LitNet
+from torch.utils.data import DataLoader, TensorDataset
+
+DEFAULT_DATA_DIR = "/data/nvflare/lightning-tabular"
+
+
+def load_csv(data_path):
+    features = []
+    labels = []
+    with Path(data_path).open(newline="", encoding="utf-8") as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            features.append([float(row[f"feature_{index}"]) for index in range(4)])
+            labels.append(int(row["label"]))
+    if not features:
+        raise ValueError(f"no rows loaded from {data_path}")
+    return TensorDataset(torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.long))
+
+
+class TabularDataModule(pl.LightningDataModule):
+    def __init__(self, data_dir=DEFAULT_DATA_DIR, batch_size=4):
+        super().__init__()
+        self.data_dir = Path(data_dir)
+        self.batch_size = batch_size
+
+    def setup(self, stage=None):
+        self.train_dataset = load_csv(self.data_dir / "train.csv")
+        self.val_dataset = load_csv(self.data_dir / "val.csv")
+
+    def train_dataloader(self):
+        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
+
+    def val_dataloader(self):
+        return DataLoader(self.val_dataset, batch_size=self.batch_size)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-dir", default=DEFAULT_DATA_DIR)
+    parser.add_argument("--batch-size", type=int, default=4)
+    args = parser.parse_args()
+
+    model = LitNet()
+    datamodule = TabularDataModule(data_dir=args.data_dir, batch_size=args.batch_size)
+    trainer = pl.Trainer(max_epochs=1, accelerator="cpu", devices=1, logger=False)
+    trainer.fit(model, datamodule=datamodule)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/gpu-device-lightning/model.py b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/gpu-device-lightning/model.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class LitNet(pl.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.layer = nn.Linear(4, 2)
+
+    def forward(self, features):
+        return self.layer(features)
+
+    def training_step(self, batch, batch_idx):
+        features, labels = batch
+        if labels.numel() == 0:
+            raise ValueError("empty training batch; check per-site data partitioning")
+        return F.cross_entropy(self(features), labels)
+
+    def configure_optimizers(self):
+        return torch.optim.SGD(self.parameters(), lr=0.01)
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/gpu-device-lightning/train.py b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/gpu-device-lightning/train.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytorch_lightning as pl
+import torch
+from model import LitNet
+from torch.utils.data import DataLoader, TensorDataset
+
+
+def main():
+    accelerator = "gpu" if torch.cuda.is_available() else "cpu"
+    dataset = TensorDataset(torch.randn(8, 4), torch.randint(0, 2, (8,)))
+    trainer = pl.Trainer(max_epochs=1, accelerator=accelerator, devices=1, logger=False)
+    trainer.fit(LitNet(), DataLoader(dataset, batch_size=4))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/hello-lightning/model.py b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/hello-lightning/model.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class LitNet(pl.LightningModule):
+    def __init__(self, input_size=4, num_classes=2, lr=0.01):
+        super().__init__()
+        self.save_hyperparameters()
+        self.fc1 = nn.Linear(input_size, 8)
+        self.fc2 = nn.Linear(8, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        return self.fc2(x)
+
+    def training_step(self, batch, batch_idx):
+        features, labels = batch
+        if labels.numel() == 0:
+            raise ValueError("empty training batch; check per-site data partitioning")
+        loss = F.cross_entropy(self(features), labels)
+        self.log("train_loss", loss)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        features, labels = batch
+        if labels.numel() == 0:
+            raise ValueError("empty validation batch; check per-site data partitioning")
+        loss = F.cross_entropy(self(features), labels)
+        self.log("val_loss", loss)
+
+    def configure_optimizers(self):
+        return torch.optim.SGD(self.parameters(), lr=self.hparams.lr)
diff --git a/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/hello-lightning/train.py b/dev_tools/agent/skill_evals/nvflare-convert-lightning/files/hello-lightning/train.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytorch_lightning as pl
+import torch
+from model import LitNet
+from torch.utils.data import DataLoader, TensorDataset
+
+
+def make_loader():
+    features = torch.randn(8, 4)
+    labels = torch.randint(0, 2, (8,))
+    return DataLoader(TensorDataset(features, labels), batch_size=4)
+
+
+def main():
+    model = LitNet()
+    train_loader = make_loader()
+    val_loader = make_loader()
+    trainer = pl.Trainer(max_epochs=1, accelerator="cpu", devices=1, logger=False)
+    trainer.fit(model, train_loader, val_loader)
+
+
+if __name__ == "__main__":
+    main()