From 22fab24e4290f909d606e50ff583374a1f055ad5 Mon Sep 17 00:00:00 2001 From: YuanTingHsieh Date: Thu, 2 Jul 2026 09:35:07 -0700 Subject: [PATCH 1/2] Write Client API config files with owner-only permissions EP-1 (Wave 0) of the Client API Execution Modes program (see docs/design/client_api_execution_modes_plan.md on PR #4853). client_api_config.json embeds live AUTH_TOKEN/AUTH_TOKEN_SIGNATURE and was written with the default umask - world-readable on most systems. ClientConfig.to_json (the single choke point for every writer: write_config_to_file, ClientAPILauncherExecutor.prepare_config_for_launch, ExternalConfigurator) now creates config files 0600 via os.open and tightens pre-existing files with chmod before rewrite. chmod failures degrade to a warning so limited-permission platforms (Windows) never crash. Behavior note: deployments that relied on a different OS user reading client_api_config.json will need explicit permission management - by design per the bootstrap-config protection contract (design doc Appendix B). Co-Authored-By: Claude Fable 5 --- .../3rd_party_integration.rst | 8 ++ nvflare/client/config.py | 32 ++++- tests/unit_test/client/config_test.py | 117 ++++++++++++++++++ 3 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 tests/unit_test/client/config_test.py diff --git a/docs/programming_guide/execution_api_type/3rd_party_integration.rst b/docs/programming_guide/execution_api_type/3rd_party_integration.rst index cb71530c7a..30f0a159d3 100644 --- a/docs/programming_guide/execution_api_type/3rd_party_integration.rst +++ b/docs/programming_guide/execution_api_type/3rd_party_integration.rst @@ -305,6 +305,14 @@ setup the trainer process on each client site: - For the Client API pattern, point ``flare.init()`` to the generated client API config in the job workspace and use ``receive/send`` in your trainer loop. + .. note:: + The generated client API config carries live authentication material and, + on POSIX systems, is written owner-only (mode ``0600``). An externally + started trainer must therefore run as the **same OS user** as the FL client + process, or the operator must explicitly re-permission the file for the + trainer's account. On Windows the file mode does not restrict NTFS ACLs; + protect the workspace directory via ACLs instead. + Verification ============ diff --git a/nvflare/client/config.py b/nvflare/client/config.py index 97c6e9cd0e..6244393b17 100644 --- a/nvflare/client/config.py +++ b/nvflare/client/config.py @@ -27,6 +27,9 @@ from nvflare.fuel.utils.config_factory import ConfigFactory from nvflare.fuel.utils.log_utils import get_obj_logger +# The Client API config can carry auth material, so persisted copies must be owner-only. +CONFIG_FILE_PERMISSION = 0o600 + class ExchangeFormat(str, Enum): RAW = "raw" @@ -274,7 +277,34 @@ def get_auth_token_signature(self): return self.config.get(FLMetaKey.AUTH_TOKEN_SIGNATURE) def to_json(self, config_file: str): - with open(config_file, "w") as f: + # The config may carry live auth material (e.g. AUTH_TOKEN / AUTH_TOKEN_SIGNATURE). + # On POSIX it must be readable by the owner only (0600); this is enforced with + # fchmod on the open descriptor so it applies whether the file is newly created + # or pre-existing (an O_CREAT mode argument only takes effect on creation), and + # the write is refused rather than exposing secrets if the mode cannot be set. + # O_NOFOLLOW rejects a planted symlink at config_file. On Windows POSIX modes do + # not map to NTFS ACLs, so this provides no read protection there — directory + # ACLs must be relied on instead. + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + if hasattr(os, "O_NOFOLLOW"): + flags |= os.O_NOFOLLOW + fd = os.open(config_file, flags, CONFIG_FILE_PERMISSION) + try: + if os.name == "posix": + try: + os.fchmod(fd, CONFIG_FILE_PERMISSION) + except OSError as e: + # Fail closed: do not write credentials into a file we cannot secure + # (e.g. a pre-existing world-readable file owned by another user). + raise RuntimeError( + f"cannot restrict {config_file} to owner-only ({oct(CONFIG_FILE_PERMISSION)}); " + f"refusing to write Client API config: {e}" + ) from e + f = os.fdopen(fd, "w") + except BaseException: + os.close(fd) + raise + with f: json.dump(self.config, f, indent=2) diff --git a/tests/unit_test/client/config_test.py b/tests/unit_test/client/config_test.py new file mode 100644 index 0000000000..8512ea5df7 --- /dev/null +++ b/tests/unit_test/client/config_test.py @@ -0,0 +1,117 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import stat + +import pytest + +from nvflare.client.config import CONFIG_FILE_PERMISSION, ClientConfig, from_file, write_config_to_file + + +def _mode(path) -> int: + return stat.S_IMODE(os.stat(path).st_mode) + + +@pytest.fixture +def restore_umask(): + """Pin umask so fresh-write permission assertions are deterministic.""" + old = os.umask(0o022) + yield + os.umask(old) + + +@pytest.mark.skipif(os.name != "posix", reason="POSIX file permissions required") +class TestConfigFilePermissions: + def test_to_json_fresh_write_is_owner_only(self, tmp_path, restore_umask): + config_file = str(tmp_path / "client_api_config.json") + ClientConfig({"SITE_NAME": "site-1"}).to_json(config_file) + + assert _mode(config_file) == CONFIG_FILE_PERMISSION + with open(config_file) as f: + assert json.load(f) == {"SITE_NAME": "site-1"} + + def test_write_config_to_file_fresh_write_is_owner_only(self, tmp_path, restore_umask): + config_file = str(tmp_path / "client_api_config.json") + write_config_to_file(config_data={"AUTH_TOKEN": "secret"}, config_file_path=config_file) + + assert _mode(config_file) == CONFIG_FILE_PERMISSION + + def test_write_config_to_file_tightens_pre_existing_world_readable_file(self, tmp_path, restore_umask): + config_file = str(tmp_path / "client_api_config.json") + with open(config_file, "w") as f: + json.dump({"SITE_NAME": "site-1"}, f) + os.chmod(config_file, 0o644) + assert _mode(config_file) == 0o644 + + write_config_to_file(config_data={"AUTH_TOKEN": "secret"}, config_file_path=config_file) + + assert _mode(config_file) == CONFIG_FILE_PERMISSION + # update-in-place must merge with the existing content + config = from_file(config_file) + assert config.config == {"SITE_NAME": "site-1", "AUTH_TOKEN": "secret"} + + def test_to_json_tightens_pre_existing_world_readable_file(self, tmp_path, restore_umask): + config_file = str(tmp_path / "client_api_config.json") + with open(config_file, "w") as f: + f.write("{}") + os.chmod(config_file, 0o644) + + ClientConfig({"AUTH_TOKEN": "secret"}).to_json(config_file) + + assert _mode(config_file) == CONFIG_FILE_PERMISSION + + def test_to_json_fails_closed_when_permission_cannot_be_set(self, tmp_path, monkeypatch): + # If the file cannot be secured (e.g. pre-existing, owned by another user), + # the write must be refused rather than writing the token into an exposed file. + config_file = str(tmp_path / "client_api_config.json") + with open(config_file, "w") as f: + f.write('{"SITE_NAME": "site-1"}') + os.chmod(config_file, 0o644) + + def deny_fchmod(fd, mode): + raise PermissionError("not owner") + + monkeypatch.setattr(os, "fchmod", deny_fchmod) + + with pytest.raises(RuntimeError, match="owner-only"): + ClientConfig({"AUTH_TOKEN": "secret"}).to_json(config_file) + + # the secret must not have been written + with open(config_file) as f: + assert "secret" not in f.read() + + @pytest.mark.skipif(not hasattr(os, "O_NOFOLLOW"), reason="O_NOFOLLOW required") + def test_to_json_rejects_symlink_target(self, tmp_path, restore_umask): + target = tmp_path / "victim.txt" + target.write_text("important") + link = str(tmp_path / "client_api_config.json") + os.symlink(str(target), link) + + with pytest.raises(OSError): + ClientConfig({"AUTH_TOKEN": "secret"}).to_json(link) + + # the symlink target must be untouched + assert target.read_text() == "important" + + +class TestConfigFileContent: + def test_write_config_to_file_round_trip(self, tmp_path): + config_file = str(tmp_path / "client_api_config.json") + write_config_to_file(config_data={"SITE_NAME": "site-1"}, config_file_path=config_file) + write_config_to_file(config_data={"JOB_ID": "job-1"}, config_file_path=config_file) + + config = from_file(config_file) + assert config.config == {"SITE_NAME": "site-1", "JOB_ID": "job-1"} From 8d69cadced8a612844e0e30d361d9500c951caea Mon Sep 17 00:00:00 2001 From: YuanTingHsieh Date: Thu, 2 Jul 2026 11:26:47 -0700 Subject: [PATCH 2/2] EP-1: write config atomically to avoid truncating the original on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review fix (PR #4855): the previous approach opened the target with O_TRUNC before fchmod, so the fail-closed path (fchmod denied) had already wiped the original file to empty — data loss even though no secret was written. Write to a sibling temp file secured with fchmod(0600) before any content, then os.replace() into place. On failure the original file is never touched (no truncation), and a planted symlink at the config path is replaced by a regular owner-only file rather than being written through to its target. Tests updated to assert the original content survives a failed write and that a symlink target is never written through. Co-Authored-By: Claude Fable 5 --- nvflare/client/config.py | 50 ++++++++++++++------------- tests/unit_test/client/config_test.py | 36 ++++++++++++------- 2 files changed, 49 insertions(+), 37 deletions(-) diff --git a/nvflare/client/config.py b/nvflare/client/config.py index 6244393b17..494cccfa54 100644 --- a/nvflare/client/config.py +++ b/nvflare/client/config.py @@ -14,6 +14,7 @@ import json import os +import tempfile from enum import Enum from typing import Dict, Optional @@ -278,34 +279,35 @@ def get_auth_token_signature(self): def to_json(self, config_file: str): # The config may carry live auth material (e.g. AUTH_TOKEN / AUTH_TOKEN_SIGNATURE). - # On POSIX it must be readable by the owner only (0600); this is enforced with - # fchmod on the open descriptor so it applies whether the file is newly created - # or pre-existing (an O_CREAT mode argument only takes effect on creation), and - # the write is refused rather than exposing secrets if the mode cannot be set. - # O_NOFOLLOW rejects a planted symlink at config_file. On Windows POSIX modes do - # not map to NTFS ACLs, so this provides no read protection there — directory - # ACLs must be relied on instead. - flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC - if hasattr(os, "O_NOFOLLOW"): - flags |= os.O_NOFOLLOW - fd = os.open(config_file, flags, CONFIG_FILE_PERMISSION) + # On POSIX it must be readable by the owner only (0600). Write atomically via a + # sibling temp file that is secured (fchmod 0600) before any content is written, + # then rename into place: on failure the original file is never touched (no + # truncation/data loss), and a planted symlink at config_file is replaced by a + # regular owner-only file rather than being written through to its target. On + # Windows POSIX modes do not map to NTFS ACLs, so read protection there relies on + # directory ACLs; the atomic replace still holds. + config_dir = os.path.dirname(os.path.abspath(config_file)) + fd, tmp_path = tempfile.mkstemp(dir=config_dir, prefix=".client_api_config-", suffix=".tmp") + fd_owned = True # cleared once os.fdopen takes ownership of the descriptor try: if os.name == "posix": - try: - os.fchmod(fd, CONFIG_FILE_PERMISSION) - except OSError as e: - # Fail closed: do not write credentials into a file we cannot secure - # (e.g. a pre-existing world-readable file owned by another user). - raise RuntimeError( - f"cannot restrict {config_file} to owner-only ({oct(CONFIG_FILE_PERMISSION)}); " - f"refusing to write Client API config: {e}" - ) from e - f = os.fdopen(fd, "w") + # mkstemp already creates 0600, but set it explicitly to be robust. + os.fchmod(fd, CONFIG_FILE_PERMISSION) + with os.fdopen(fd, "w") as f: + fd_owned = False # the fdopen wrapper now owns and will close fd + json.dump(self.config, f, indent=2) + os.replace(tmp_path, config_file) except BaseException: - os.close(fd) + # Best-effort cleanup; never leave the temp file behind and never touch the + # original config_file on failure. + if fd_owned: + try: + os.close(fd) + except OSError: + pass + if os.path.exists(tmp_path): + os.remove(tmp_path) raise - with f: - json.dump(self.config, f, indent=2) def from_file(config_file: str): diff --git a/tests/unit_test/client/config_test.py b/tests/unit_test/client/config_test.py index 8512ea5df7..8d3a0d4a2c 100644 --- a/tests/unit_test/client/config_test.py +++ b/tests/unit_test/client/config_test.py @@ -73,38 +73,48 @@ def test_to_json_tightens_pre_existing_world_readable_file(self, tmp_path, resto assert _mode(config_file) == CONFIG_FILE_PERMISSION - def test_to_json_fails_closed_when_permission_cannot_be_set(self, tmp_path, monkeypatch): - # If the file cannot be secured (e.g. pre-existing, owned by another user), - # the write must be refused rather than writing the token into an exposed file. + def test_to_json_fails_closed_and_preserves_original_when_cannot_secure(self, tmp_path, monkeypatch): + # If the temp file cannot be secured (e.g. fchmod denied), the write must be + # refused AND the pre-existing config left intact (the atomic temp+rename never + # touches the original on failure — no truncation/data loss). config_file = str(tmp_path / "client_api_config.json") with open(config_file, "w") as f: f.write('{"SITE_NAME": "site-1"}') - os.chmod(config_file, 0o644) def deny_fchmod(fd, mode): raise PermissionError("not owner") monkeypatch.setattr(os, "fchmod", deny_fchmod) - with pytest.raises(RuntimeError, match="owner-only"): + with pytest.raises(PermissionError): ClientConfig({"AUTH_TOKEN": "secret"}).to_json(config_file) - # the secret must not have been written + # the secret must not have been written, and the original content must survive with open(config_file) as f: - assert "secret" not in f.read() - - @pytest.mark.skipif(not hasattr(os, "O_NOFOLLOW"), reason="O_NOFOLLOW required") - def test_to_json_rejects_symlink_target(self, tmp_path, restore_umask): + content = f.read() + assert "secret" not in content + assert "SITE_NAME" in content + # no temp file left behind + assert not any(name.startswith(".client_api_config-") for name in os.listdir(tmp_path)) + + @pytest.mark.skipif(not hasattr(os, "O_NOFOLLOW"), reason="POSIX symlink semantics required") + def test_to_json_does_not_write_through_symlink(self, tmp_path, restore_umask): + # A planted symlink at the config path must not be written through to its target; + # the atomic replace swaps the symlink for a regular owner-only file instead. target = tmp_path / "victim.txt" target.write_text("important") link = str(tmp_path / "client_api_config.json") os.symlink(str(target), link) - with pytest.raises(OSError): - ClientConfig({"AUTH_TOKEN": "secret"}).to_json(link) + ClientConfig({"AUTH_TOKEN": "secret"}).to_json(link) - # the symlink target must be untouched + # the symlink target is never written through assert target.read_text() == "important" + # the config path is now a regular, owner-only file holding the config + assert not os.path.islink(link) + assert _mode(link) == CONFIG_FILE_PERMISSION + with open(link) as f: + assert "secret" in f.read() class TestConfigFileContent: