From 0c294354b775aa95c7a6eb71fcd00e351ce78897 Mon Sep 17 00:00:00 2001 From: Edwin Bernal Date: Tue, 30 Jun 2026 09:05:24 -0700 Subject: [PATCH 1/5] [Bugfix] Address command injection vulnerability in vm-repair extension; validate and quote tag values --- src/vm-repair/HISTORY.rst | 4 + src/vm-repair/azext_vm_repair/custom.py | 22 +- src/vm-repair/azext_vm_repair/repair_utils.py | 61 ++++- .../tests/latest/test_command_injection.py | 209 ++++++++++++++++++ src/vm-repair/setup.py | 2 +- 5 files changed, 291 insertions(+), 7 deletions(-) create mode 100644 src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py diff --git a/src/vm-repair/HISTORY.rst b/src/vm-repair/HISTORY.rst index 1a331136141..a1e7bdee50a 100644 --- a/src/vm-repair/HISTORY.rst +++ b/src/vm-repair/HISTORY.rst @@ -2,6 +2,10 @@ Release History =============== +2.2.1 +++++++ +Fixing a command injection vulnerability (MSRC 115198 / VULN-185362). Source VM tag values copied via ``--copy-tags`` could contain shell metacharacters that, on Windows, were interpreted by ``cmd.exe`` and executed as arbitrary commands on the operator's workstation. Tag keys and values are now validated and quoted before being interpolated into the ``az`` command, and ``_call_az_command`` quotes every argument so ``cmd.exe`` treats shell metacharacters as literal text. Minimum fixed version: 2.2.1. + 2.2.0 ++++++ Adding `--tags` parameter to `vm repair create` and `vm repair repair-and-restore` commands to allow users to tag the repair VM for organizational requirements diff --git a/src/vm-repair/azext_vm_repair/custom.py b/src/vm-repair/azext_vm_repair/custom.py index c8147a5547b..d4b09773609 100644 --- a/src/vm-repair/azext_vm_repair/custom.py +++ b/src/vm-repair/azext_vm_repair/custom.py @@ -5,6 +5,7 @@ # pylint: disable=line-too-long, too-many-locals, too-many-statements, broad-except, too-many-branches import json +import shlex import timeit import traceback import requests @@ -14,6 +15,7 @@ from azure.cli.command_modules.vm.custom import get_vm, _is_linux_os from azure.cli.command_modules.storage.storage_url_helpers import StorageResourceIdentifier from azure.mgmt.core.tools import parse_resource_id +from azure.cli.core.azclierror import InvalidArgumentValueError from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, ScriptReturnsError, SupportingResourceNotFoundError, CommandCanceledByUserError from .command_helper_class import command_helper @@ -136,8 +138,26 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern if sep: merged_tags[repair_key] = repair_value + # Validate tag keys and values before they are placed into the command string. + # Tag values can originate from the source VM (via --copy-tags) and are therefore + # untrusted. Reject characters that cannot be safely represented as a single shell + # argument (control characters and double quotes); every other character, including + # shell metacharacters such as & | < >, is preserved and passed through literally. + # See MSRC 115198 / VULN-185362. + for tag_key, tag_value in merged_tags.items(): + for tag_field in (str(tag_key), str(tag_value)): + if any(unsafe_char in tag_field for unsafe_char in ('"', '\n', '\r', '\x00')): + raise InvalidArgumentValueError( + f'Tag keys and values must not contain double quotes or control characters. Offending tag: {tag_key}={tag_value}' + ) + # Convert to CLI string for passing to az cli later. - tag_string = ' '.join(f'{k}={v}' for k, v in merged_tags.items()) + # Each key=value token is quoted with shlex.quote so values containing spaces or + # shell metacharacters survive re-tokenization in _call_az_command as a single + # argument. Combined with the Windows cmd.exe quoting in _call_az_command, this + # prevents command injection through attacker-controlled source VM tags. + # See MSRC 115198 / VULN-185362. + tag_string = ' '.join(shlex.quote(f'{tag_key}={tag_value}') for tag_key, tag_value in merged_tags.items()) # initializing the list of created resources. created_resources = [] diff --git a/src/vm-repair/azext_vm_repair/repair_utils.py b/src/vm-repair/azext_vm_repair/repair_utils.py index b307c11f624..8033fb2c2aa 100644 --- a/src/vm-repair/azext_vm_repair/repair_utils.py +++ b/src/vm-repair/azext_vm_repair/repair_utils.py @@ -60,6 +60,38 @@ def _is_gen2(vm): return 1 +def _quote_cmd_arg(arg): + """ + Quote a single argument for safe use on a Windows 'cmd /c' command line. + + The argument is always wrapped in double quotes so that cmd.exe treats shell + metacharacters such as & | < > ( ) ^ as literal text instead of operators. + Embedded double quotes and any backslashes that precede them are escaped using + the Windows CommandLineToArgvW convention so the receiving program parses the + original value. This prevents command injection from untrusted values (for + example source VM tags) that are interpolated into the command string. + See MSRC 115198 / VULN-185362. + """ + result = '"' + backslash_count = 0 + for char in arg: + if char == '\\': + backslash_count += 1 + elif char == '"': + # Double the backslashes that precede the quote, then escape the quote. + result += '\\' * (backslash_count * 2 + 1) + result += '"' + backslash_count = 0 + else: + result += '\\' * backslash_count + result += char + backslash_count = 0 + # Double any trailing backslashes so they do not escape the closing quote. + result += '\\' * (backslash_count * 2) + result += '"' + return result + + def _call_az_command(command_string, run_async=False, secure_params=None): """ Uses subprocess to run a command string. To hide sensitive parameters from logs, add the @@ -72,10 +104,6 @@ def _call_az_command(command_string, run_async=False, secure_params=None): # If command does not start with 'az' then raise exception if not tokenized_command or tokenized_command[0] != 'az': raise AzCommandError("The command string is not an 'az' command!") - # If run on windows, add 'cmd /c' - windows_os_name = 'nt' - if os.name == windows_os_name: - tokenized_command = ['cmd', '/c'] + tokenized_command # Hide sensitive data such as passwords from logs if secure_params: @@ -83,7 +111,30 @@ def _call_az_command(command_string, run_async=False, secure_params=None): if param: command_string = command_string.replace(param, '********') logger.debug("Calling: %s", command_string) - process = subprocess.Popen(tokenized_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + + # On Windows, 'az' resolves to a batch file (az.cmd) so the call must be launched + # through cmd.exe. Handing the tokenized list to subprocess would let cmd.exe + # re-interpret shell metacharacters: subprocess.list2cmdline only quotes tokens that + # contain whitespace, so a token such as 'env=ok&echo' would reach cmd.exe unquoted + # and the '&' would be parsed as a command separator. To prevent command injection + # from untrusted interpolated values (for example source VM tags), build the command + # line explicitly and wrap every token in double quotes so cmd.exe treats + # metacharacters as literal text. + # + # The whole command is additionally wrapped in one outer pair of quotes and invoked + # with 'cmd /s /c "..."'. Without '/s', cmd.exe strips the first and last quote on the + # line (its documented /c behavior), which would unbalance the quoting around the final + # argument and re-expose metacharacters. With '/s' and a leading+trailing quote, cmd.exe + # strips exactly those outer quotes and parses the remainder verbatim, keeping every + # per-token quote balanced. See MSRC 115198 / VULN-185362. + windows_os_name = 'nt' + if os.name == windows_os_name: + quoted_command = ' '.join(_quote_cmd_arg(token) for token in tokenized_command) + command_to_run = 'cmd /s /c "' + quoted_command + '"' + else: + command_to_run = tokenized_command + + process = subprocess.Popen(command_to_run, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) # Wait for process to terminate and fetch stdout and stderror if not run_async: diff --git a/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py b/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py new file mode 100644 index 00000000000..9924e3adeea --- /dev/null +++ b/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py @@ -0,0 +1,209 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +# Unit regression tests for the command-injection hardening of the vm-repair extension. +# An attacker holding only 'Microsoft.Resources/tags/write' on a source VM could store a +# tag value containing cmd.exe metacharacters. When an operator ran +# 'az vm repair create --copy-tags' on Windows, the unescaped tag value was interpolated +# into a command string and executed through 'cmd /c', resulting in remote code execution +# on the operator's workstation. See MSRC 115198 / VULN-185362. + +import os +import shlex +import subprocess +import sys +from unittest import mock + +import pytest + +from azext_vm_repair.repair_utils import _call_az_command, _quote_cmd_arg + + +class _FakeProcess: + """Minimal stand-in for a subprocess.Popen object.""" + + def __init__(self, returncode=0, stdout='', stderr=''): + self.returncode = returncode + self._stdout = stdout + self._stderr = stderr + + def communicate(self): + return self._stdout, self._stderr + + +# cmd.exe metacharacters that can change command parsing / cause command injection. +CMD_METACHARACTERS = ['&', '|', '<', '>', '^', '(', ')', '&&', '||'] + + +@pytest.mark.parametrize('meta', CMD_METACHARACTERS) +def test_quote_cmd_arg_wraps_metacharacters_in_quotes(meta): + value = 'env=ok{meta}calc'.format(meta=meta) + quoted = _quote_cmd_arg(value) + # The whole token must be wrapped in double quotes so cmd.exe treats the + # metacharacter as literal text rather than a shell operator. + assert quoted == '"env=ok{meta}calc"'.format(meta=meta) + assert quoted.startswith('"') + assert quoted.endswith('"') + + +def test_quote_cmd_arg_escapes_embedded_quote(): + # An embedded double quote is escaped per the CommandLineToArgvW convention. + assert _quote_cmd_arg('a"b') == '"a\\"b"' + + +def test_quote_cmd_arg_doubles_trailing_backslashes(): + # Trailing backslashes are doubled so they cannot escape the closing quote. + assert _quote_cmd_arg('path\\') == '"path\\\\"' + + +def test_quote_cmd_arg_preserves_normal_path(): + assert _quote_cmd_arg('C:\\Users\\Public\\file.txt') == '"C:\\Users\\Public\\file.txt"' + + +def test_quote_cmd_arg_empty_string(): + # An empty argument must still be emitted as an explicit empty quoted token. + assert _quote_cmd_arg('') == '""' + + +def test_quote_cmd_arg_space_only(): + assert _quote_cmd_arg(' ') == '" "' + + +def test_quote_cmd_arg_value_with_spaces(): + assert _quote_cmd_arg('hello world') == '"hello world"' + + +def test_quote_cmd_arg_single_trailing_backslash_is_doubled(): + # One trailing backslash is doubled so it cannot escape the closing quote. + assert _quote_cmd_arg('a\\') == '"a' + '\\' * 2 + '"' + + +def test_quote_cmd_arg_multiple_trailing_backslashes_are_doubled(): + # Two trailing backslashes become four. + assert _quote_cmd_arg('a\\\\') == '"a' + '\\' * 4 + '"' + + +def test_quote_cmd_arg_backslash_before_quote(): + # A backslash that precedes a quote: 2*n+1 backslashes, then the escaped quote. + assert _quote_cmd_arg('a\\"b') == '"a' + '\\' * 3 + '"' + 'b"' + + +def test_quote_cmd_arg_internal_backslash_not_doubled(): + # A backslash that does not precede a quote stays single. + assert _quote_cmd_arg('a\\b') == '"a\\b"' + + +@mock.patch('azext_vm_repair.repair_utils.subprocess.Popen') +def test_call_az_command_windows_quotes_each_token(mock_popen): + mock_popen.return_value = _FakeProcess(returncode=0, stdout='ok') + payload = 'az vm create --tags env=ok&echo pwned>file.txt&rem' + with mock.patch('azext_vm_repair.repair_utils.os.name', 'nt'): + _call_az_command(payload) + + # On Windows the command must be passed as a single string launched through + # 'cmd /s /c "..."'. The '/s' plus the single outer pair of quotes guarantees cmd.exe + # strips exactly the outer quotes and parses the remainder with every per-token quote + # balanced, so the last argument's closing quote is never removed. + command_line = mock_popen.call_args[0][0] + assert isinstance(command_line, str) + assert command_line.startswith('cmd /s /c "') + assert command_line.endswith('"') + # The metacharacter-laden tokens must each be wrapped in double quotes. + assert '"env=ok&echo"' in command_line + assert '"pwned>file.txt&rem"' in command_line + + +@mock.patch('azext_vm_repair.repair_utils.subprocess.Popen') +def test_call_az_command_posix_passes_argument_list(mock_popen): + mock_popen.return_value = _FakeProcess(returncode=0, stdout='ok') + with mock.patch('azext_vm_repair.repair_utils.os.name', 'posix'): + _call_az_command('az vm create --tags env=ok&echo') + + # On POSIX the tokenized list is handed straight to subprocess with no shell, so the + # '&' is a literal character inside a single argument and cannot be interpreted. + command_args = mock_popen.call_args[0][0] + assert isinstance(command_args, list) + assert command_args[0] == 'az' + assert 'env=ok&echo' in command_args + + +@mock.patch('azext_vm_repair.repair_utils.subprocess.Popen') +def test_malicious_tag_is_neutralized_end_to_end(mock_popen): + # Reproduces the submitted exploit payload through the same two-stage pipeline used + # by custom.create(): the tag is quoted with shlex.quote when the command string is + # built, then re-tokenized and cmd-quoted inside _call_az_command on Windows. + mock_popen.return_value = _FakeProcess(returncode=0, stdout='ok') + malicious_value = 'ok&echo P2ADDR>C:/Users/Public/RCE_PROOF.txt&rem' + tag_token = shlex.quote('env={value}'.format(value=malicious_value)) + command = 'az vm create -g rg -n vm --tags {tag}'.format(tag=tag_token) + + with mock.patch('azext_vm_repair.repair_utils.os.name', 'nt'): + _call_az_command(command) + + command_line = mock_popen.call_args[0][0] + assert isinstance(command_line, str) + # The entire tag, including '&' and '>', must survive as a single quoted token so + # cmd.exe never sees an unquoted command separator or redirection operator. + assert '"env={value}"'.format(value=malicious_value) in command_line + + +@mock.patch('azext_vm_repair.repair_utils.subprocess.Popen') +def test_call_az_command_rejects_non_az_command(mock_popen): + from azext_vm_repair.exceptions import AzCommandError + with pytest.raises(AzCommandError): + _call_az_command('notaz vm create') + mock_popen.assert_not_called() + + +# Strings that must survive a real cmd.exe parse as a single literal argument without +# triggering command execution. Includes cmd.exe metacharacters and POSIX-shell payloads. +ROUNDTRIP_PAYLOADS = [ + 'safe', + 'a&b', + 'a|b', + 'a>b', + 'aRCE_PROOF.txt&rem', + 'a;b', + '$(whoami)', + '`whoami`', +] + + +@pytest.mark.skipif(os.name != 'nt', reason='cmd.exe quoting is Windows-specific') +@pytest.mark.parametrize('payload', ROUNDTRIP_PAYLOADS) +def test_quote_cmd_arg_roundtrip_through_real_cmd(payload): + # Strongest guard: build the SAME 'cmd /s /c "..."' string the production Windows path + # builds and execute it through a real cmd.exe, using a tiny python program as the + # target that echoes argv[1]. If any metacharacter were interpreted by cmd.exe, the + # payload would not be returned verbatim (or an injected command would run), so an + # exact-match stdout proves the value is passed literally and safely. + target = 'import sys; sys.stdout.write(sys.argv[1])' + tokens = [sys.executable, '-c', target, payload] + command_line = 'cmd /s /c "' + ' '.join(_quote_cmd_arg(t) for t in tokens) + '"' + completed = subprocess.run(command_line, capture_output=True, text=True) + assert completed.stdout == payload + assert completed.returncode == 0 + + +@pytest.mark.skipif(os.name != 'nt', reason='cmd.exe quoting is Windows-specific') +def test_no_command_injection_through_real_cmd(tmp_path): + # End-to-end injection probe: the payload tries to write a marker file via '&echo'. + # If cmd.exe interpreted the '&', the marker would be created. The production + # construction must keep it inside quotes so the marker is never written. + marker = tmp_path / 'INJECTED.txt' + payload = 'safe&echo boom>"{marker}"&rem'.format(marker=marker) + target = 'import sys; sys.stdout.write(sys.argv[1])' + tokens = [sys.executable, '-c', target, payload] + command_line = 'cmd /s /c "' + ' '.join(_quote_cmd_arg(t) for t in tokens) + '"' + completed = subprocess.run(command_line, capture_output=True, text=True) + assert not marker.exists(), 'command injection occurred: marker file was created' + assert completed.stdout == payload diff --git a/src/vm-repair/setup.py b/src/vm-repair/setup.py index 8fa357755d5..cedf28498cb 100644 --- a/src/vm-repair/setup.py +++ b/src/vm-repair/setup.py @@ -8,7 +8,7 @@ from codecs import open from setuptools import setup, find_packages -VERSION = "2.2.0" +VERSION = "2.2.1" CLASSIFIERS = [ 'Development Status :: 4 - Beta', From 1c2da79ee1be5f19817ec390d94efcab72159fa8 Mon Sep 17 00:00:00 2001 From: Edwin Bernal Date: Wed, 1 Jul 2026 17:06:37 -0700 Subject: [PATCH 2/5] Fix tag validation to disallow control characters and double quotes --- src/vm-repair/azext_vm_repair/custom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vm-repair/azext_vm_repair/custom.py b/src/vm-repair/azext_vm_repair/custom.py index d4b09773609..546960f6984 100644 --- a/src/vm-repair/azext_vm_repair/custom.py +++ b/src/vm-repair/azext_vm_repair/custom.py @@ -146,7 +146,7 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern # See MSRC 115198 / VULN-185362. for tag_key, tag_value in merged_tags.items(): for tag_field in (str(tag_key), str(tag_value)): - if any(unsafe_char in tag_field for unsafe_char in ('"', '\n', '\r', '\x00')): + if '"' in tag_field or any(ord(ch) < 32 or ord(ch) == 127 for ch in tag_field): raise InvalidArgumentValueError( f'Tag keys and values must not contain double quotes or control characters. Offending tag: {tag_key}={tag_value}' ) From 3ac262e88e4ce3788024cc09ac58a72b99994b82 Mon Sep 17 00:00:00 2001 From: Edwin Bernal Date: Wed, 1 Jul 2026 17:34:21 -0700 Subject: [PATCH 3/5] Add assertion for successful command execution in command injection test --- .../azext_vm_repair/tests/latest/test_command_injection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py b/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py index 9924e3adeea..a79bee0d84b 100644 --- a/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py +++ b/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py @@ -207,3 +207,4 @@ def test_no_command_injection_through_real_cmd(tmp_path): completed = subprocess.run(command_line, capture_output=True, text=True) assert not marker.exists(), 'command injection occurred: marker file was created' assert completed.stdout == payload + assert completed.returncode == 0 From c9fdf88437e0ef9c9f70f0c3bd63b80be765d2b1 Mon Sep 17 00:00:00 2001 From: Edwin Bernal Date: Wed, 1 Jul 2026 18:16:06 -0700 Subject: [PATCH 4/5] Enhance tag validation to disallow double quotes, percent signs, exclamation marks, and control characters --- src/vm-repair/azext_vm_repair/custom.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/vm-repair/azext_vm_repair/custom.py b/src/vm-repair/azext_vm_repair/custom.py index 546960f6984..46484059efe 100644 --- a/src/vm-repair/azext_vm_repair/custom.py +++ b/src/vm-repair/azext_vm_repair/custom.py @@ -141,14 +141,18 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern # Validate tag keys and values before they are placed into the command string. # Tag values can originate from the source VM (via --copy-tags) and are therefore # untrusted. Reject characters that cannot be safely represented as a single shell - # argument (control characters and double quotes); every other character, including - # shell metacharacters such as & | < >, is preserved and passed through literally. - # See MSRC 115198 / VULN-185362. + # argument: double quotes and control characters (which break tokenization), plus + # '%' and '!' which cmd.exe expands as environment / delayed-expansion variables + # even inside double quotes. There is no reliable way to escape '%' on a 'cmd /c' + # command line -- a leading '^' survives as a literal caret and corrupts the value -- + # so these characters are rejected here at the boundary. Every other character, + # including shell metacharacters such as & | < >, is preserved and passed through + # literally. See MSRC 115198 / VULN-185362. for tag_key, tag_value in merged_tags.items(): for tag_field in (str(tag_key), str(tag_value)): - if '"' in tag_field or any(ord(ch) < 32 or ord(ch) == 127 for ch in tag_field): + if any(ch in tag_field for ch in ('"', '%', '!')) or any(ord(ch) < 32 or ord(ch) == 127 for ch in tag_field): raise InvalidArgumentValueError( - f'Tag keys and values must not contain double quotes or control characters. Offending tag: {tag_key}={tag_value}' + f'Tag keys and values must not contain double quotes, percent signs, exclamation marks, or control characters. Offending tag: {tag_key}={tag_value}' ) # Convert to CLI string for passing to az cli later. From 5d31bab8f59f54e947d821f062ee7fa384219785 Mon Sep 17 00:00:00 2001 From: Edwin Bernal Date: Thu, 2 Jul 2026 04:16:14 +0000 Subject: [PATCH 5/5] Implement tag validation to reject unsafe characters in command inputs --- src/vm-repair/azext_vm_repair/custom.py | 21 ++--- src/vm-repair/azext_vm_repair/repair_utils.py | 25 +++++- .../tests/latest/test_command_injection.py | 88 ++++++++++++++++++- 3 files changed, 117 insertions(+), 17 deletions(-) diff --git a/src/vm-repair/azext_vm_repair/custom.py b/src/vm-repair/azext_vm_repair/custom.py index 46484059efe..6e2bd064f43 100644 --- a/src/vm-repair/azext_vm_repair/custom.py +++ b/src/vm-repair/azext_vm_repair/custom.py @@ -15,7 +15,6 @@ from azure.cli.command_modules.vm.custom import get_vm, _is_linux_os from azure.cli.command_modules.storage.storage_url_helpers import StorageResourceIdentifier from azure.mgmt.core.tools import parse_resource_id -from azure.cli.core.azclierror import InvalidArgumentValueError from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, ScriptReturnsError, SupportingResourceNotFoundError, CommandCanceledByUserError from .command_helper_class import command_helper @@ -26,6 +25,7 @@ _fetch_compatible_sku, _list_resource_ids_in_rg, _get_repair_resource_tag, + _validate_tags_for_command, _fetch_compatible_windows_os_urn, _fetch_matching_windows_os_urn, _fetch_run_script_map, @@ -140,20 +140,11 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern # Validate tag keys and values before they are placed into the command string. # Tag values can originate from the source VM (via --copy-tags) and are therefore - # untrusted. Reject characters that cannot be safely represented as a single shell - # argument: double quotes and control characters (which break tokenization), plus - # '%' and '!' which cmd.exe expands as environment / delayed-expansion variables - # even inside double quotes. There is no reliable way to escape '%' on a 'cmd /c' - # command line -- a leading '^' survives as a literal caret and corrupts the value -- - # so these characters are rejected here at the boundary. Every other character, - # including shell metacharacters such as & | < >, is preserved and passed through - # literally. See MSRC 115198 / VULN-185362. - for tag_key, tag_value in merged_tags.items(): - for tag_field in (str(tag_key), str(tag_value)): - if any(ch in tag_field for ch in ('"', '%', '!')) or any(ord(ch) < 32 or ord(ch) == 127 for ch in tag_field): - raise InvalidArgumentValueError( - f'Tag keys and values must not contain double quotes, percent signs, exclamation marks, or control characters. Offending tag: {tag_key}={tag_value}' - ) + # untrusted. _validate_tags_for_command rejects double quotes, control characters and + # the cmd.exe expansion characters '%' and '!' (which cannot be safely escaped on a + # 'cmd /c' command line); every other character, including shell metacharacters such + # as & | < >, is preserved and passed through literally. See MSRC 115198 / VULN-185362. + _validate_tags_for_command(merged_tags) # Convert to CLI string for passing to az cli later. # Each key=value token is quoted with shlex.quote so values containing spaces or diff --git a/src/vm-repair/azext_vm_repair/repair_utils.py b/src/vm-repair/azext_vm_repair/repair_utils.py index 8033fb2c2aa..dad85d04817 100644 --- a/src/vm-repair/azext_vm_repair/repair_utils.py +++ b/src/vm-repair/azext_vm_repair/repair_utils.py @@ -18,7 +18,7 @@ from .encryption_types import Encryption from .exceptions import (AzCommandError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, SkuNotAvailableError) -from azure.cli.core.azclierror import CLIError +from azure.cli.core.azclierror import CLIError, InvalidArgumentValueError REPAIR_MAP_URL = 'https://raw.githubusercontent.com/Azure/repair-script-library/master/map.json' @@ -92,6 +92,29 @@ def _quote_cmd_arg(arg): return result +# Characters that cannot be safely carried through a Windows 'cmd /c' command line when +# interpolated from an untrusted tag value (for example a source VM tag copied via +# --copy-tags). Double quotes and ASCII control characters break argument tokenization, +# and '%' / '!' are expanded by cmd.exe as environment / delayed-expansion variables even +# inside double quotes -- '%' in particular cannot be reliably escaped on a 'cmd /c' line +# (a leading '^' is preserved as a literal caret and corrupts the value). Such characters +# are therefore rejected at the boundary rather than escaped. See MSRC 115198 / VULN-185362. +def _validate_tags_for_command(merged_tags): + """ + Reject tag keys and values that contain characters which are unsafe to interpolate + into the 'az' command string. Raises InvalidArgumentValueError on the first offending + key or value; returns None when every tag is safe. + """ + for tag_key, tag_value in merged_tags.items(): + for tag_field in (str(tag_key), str(tag_value)): + if any(unsafe_char in tag_field for unsafe_char in ('"', '%', '!')) or \ + any(ord(ch) < 32 or ord(ch) == 127 for ch in tag_field): + raise InvalidArgumentValueError( + f'Tag keys and values must not contain double quotes, percent signs, ' + f'exclamation marks, or control characters. Offending tag: {tag_key}={tag_value}' + ) + + def _call_az_command(command_string, run_async=False, secure_params=None): """ Uses subprocess to run a command string. To hide sensitive parameters from logs, add the diff --git a/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py b/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py index a79bee0d84b..7e206b7df60 100644 --- a/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py +++ b/src/vm-repair/azext_vm_repair/tests/latest/test_command_injection.py @@ -18,7 +18,9 @@ import pytest -from azext_vm_repair.repair_utils import _call_az_command, _quote_cmd_arg +from azure.cli.core.azclierror import InvalidArgumentValueError + +from azext_vm_repair.repair_utils import _call_az_command, _quote_cmd_arg, _validate_tags_for_command class _FakeProcess: @@ -208,3 +210,87 @@ def test_no_command_injection_through_real_cmd(tmp_path): assert not marker.exists(), 'command injection occurred: marker file was created' assert completed.stdout == payload assert completed.returncode == 0 + + +# --------------------------------------------------------------------------- +# Tag validation (_validate_tags_for_command) +# +# Tag keys/values reach the command string from an untrusted source (a source VM +# tag copied via --copy-tags). Characters that cannot be safely carried through a +# Windows 'cmd /c' line must be rejected at the boundary. '%' and '!' are especially +# important: cmd.exe expands them as environment / delayed-expansion variables even +# inside double quotes, and '%' cannot be reliably escaped. See MSRC 115198. +# --------------------------------------------------------------------------- + +# Characters that must be rejected in either a tag key or a tag value. +UNSAFE_TAG_CHARS = [ + '"', # breaks the shlex / cmd.exe quoting + '%', # cmd.exe environment-variable expansion (e.g. %USERNAME%) + '!', # cmd.exe delayed expansion when 'cmd /v:on' is enabled + '\n', # control character + '\r', # control character + '\x00', # NUL + '\t', # tab (C0 control) + '\x1b', # ESC / terminal-control sequence introducer + '\x07', # BEL + '\x7f', # DEL +] + + +@pytest.mark.parametrize('bad_char', UNSAFE_TAG_CHARS) +def test_validate_tags_rejects_unsafe_value(bad_char): + with pytest.raises(InvalidArgumentValueError): + _validate_tags_for_command({'env': 'ok{bad}'.format(bad=bad_char)}) + + +@pytest.mark.parametrize('bad_char', UNSAFE_TAG_CHARS) +def test_validate_tags_rejects_unsafe_key(bad_char): + # A malicious character in the key must be rejected just like one in the value. + with pytest.raises(InvalidArgumentValueError): + _validate_tags_for_command({'key{bad}'.format(bad=bad_char): 'value'}) + + +def test_validate_tags_rejects_percent_expansion_payload(): + # The concrete information-leak vector: %USERNAME% would be expanded by cmd.exe to the + # operator's local username even inside double quotes, so it must be rejected. + with pytest.raises(InvalidArgumentValueError): + _validate_tags_for_command({'owner': 'env=%USERNAME%'}) + + +# Shell metacharacters that are NOT expanded/interpreted once wrapped in double quotes by +# _quote_cmd_arg. These must be preserved (passed through literally), not rejected. +ALLOWED_TAG_VALUES = [ + 'plain', + 'owner=R&D', + 'a|b', + 'ac', + 'path C:\\Users\\Public', + 'a(b)c', + 'a^b', + 'ok&echo', + 'semi;colon', + '$(whoami)', + '`whoami`', + 'space separated value', +] + + +@pytest.mark.parametrize('good_value', ALLOWED_TAG_VALUES) +def test_validate_tags_allows_safe_values(good_value): + # Must not raise: these characters survive as literal text through the two-stage + # shlex.quote + _quote_cmd_arg pipeline, so rejecting them would be an unnecessary + # regression in functionality. + _validate_tags_for_command({'tag': good_value}) + + +def test_validate_tags_empty_dict_is_allowed(): + # No tags is trivially safe. + _validate_tags_for_command({}) + + +def test_validate_tags_error_message_names_offending_tag(): + with pytest.raises(InvalidArgumentValueError) as exc_info: + _validate_tags_for_command({'owner': 'bad%value'}) + message = str(exc_info.value) + assert 'owner' in message + assert 'bad%value' in message