diff --git a/cabal.project b/cabal.project index f35b3b95050..4359520115d 100644 --- a/cabal.project +++ b/cabal.project @@ -43,6 +43,7 @@ packages: , services/proxy/ , services/wire-server-enterprise , services/spar/ + , services/memchr-test/ , tools/db/assets/ , tools/db/auto-whitelist/ , tools/db/find-undead/ diff --git a/capstone-scan.py b/capstone-scan.py new file mode 100755 index 00000000000..936fdf973b6 --- /dev/null +++ b/capstone-scan.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +from capstone import * +from capstone.x86 import * +from elftools.elf.elffile import ELFFile +import sys + +# --- Configure your policy here --- +# Example policy: allow up to SSE4.2, but forbid AVX/AVX2/AVX512 and also forbid BMI/ADX. +FORBIDDEN_GROUPS = { + # "AVX": X86_GRP_AVX, + # "AVX2": X86_GRP_AVX2, + "AVX512": X86_GRP_AVX512, + # You can add more depending on what you consider "too new". + # Note: Capstone's group coverage is good for SIMD families, less so for every x86 feature. +} + +# Optional: also flag specific mnemonics (useful for BMI1/BMI2 etc. if group coverage isn’t enough) +FORBIDDEN_MNEMONICS = { + # "tzcnt", "lzcnt", "andn", "bextr", "pdep", "pext", "mulx", "adox", "adcx", +} + +def iter_exec_sections(elffile): + for sec in elffile.iter_sections(): + sh = sec.header + # SHF_EXECINSTR = 0x4 + if (sh["sh_flags"] & 0x4) and sh["sh_size"] > 0: + yield sec + +def main(path): + with open(path, "rb") as f: + elf = ELFFile(f) + + # Capstone x86-64 + md = Cs(CS_ARCH_X86, CS_MODE_64) + md.detail = True + md.skipdata = True + + any_bad = False + + for sec in iter_exec_sections(elf): + code = sec.data() + addr = sec["sh_addr"] + + for insn in md.disasm(code, addr): + # Skip SKIPDATA pseudo-instructions (data, not code) + if insn.id == 0: + continue + + bad_reasons = [] + + # mnemonic-based checks + if insn.mnemonic in FORBIDDEN_MNEMONICS: + bad_reasons.append(f"mnemonic:{insn.mnemonic}") + + # group-based checks + for name, grp in FORBIDDEN_GROUPS.items(): + if grp in insn.groups: + bad_reasons.append(f"group:{name}") + + if bad_reasons: + any_bad = True + # print minimal but actionable info + print(f"{path}:{sec.name}:0x{insn.address:x}: {insn.mnemonic} {insn.op_str} ({', '.join(bad_reasons)})") + + sys.exit(1 if any_bad else 0) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(f"usage: {sys.argv[0]} /path/to/binary", file=sys.stderr) + sys.exit(2) + main(sys.argv[1]) diff --git a/extract-brig.sh b/extract-brig.sh new file mode 100755 index 00000000000..47fff3250f9 --- /dev/null +++ b/extract-brig.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Extract brig binary from docker image + +set -euo pipefail + +VERSION="5.25.0" +IMAGE="quay.io/wire/brig:${VERSION}" + +echo "Extracting brig binary from ${IMAGE}..." + +# Pull the image +docker pull "${IMAGE}" + +# Get the entrypoint to find the brig path +ENTRYPOINT=$(docker inspect "${IMAGE}" | jq -r '.[0].Config.Entrypoint | .[-1]') +echo "Brig binary location: ${ENTRYPOINT}" + +# Create a container from the image +CONTAINER=$(docker create "${IMAGE}") + +# Export the container filesystem and extract just the brig binary +echo "Extracting from container..." +TEMP_DIR=$(mktemp -d) +docker export "${CONTAINER}" | tar -x -C "${TEMP_DIR}" "${ENTRYPOINT#/}" +cp "${TEMP_DIR}${ENTRYPOINT}" "./brig-${VERSION}" +rm -rf "${TEMP_DIR}" + +# Clean up the container +docker rm "${CONTAINER}" + +echo "Extracted brig binary to: ./brig-${VERSION}" diff --git a/extract-memchr-test.sh b/extract-memchr-test.sh new file mode 100755 index 00000000000..30fabecdb74 --- /dev/null +++ b/extract-memchr-test.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Extract memchr-test binary from docker image + +set -euo pipefail + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + echo "Example: $0 quay.io/wire/memchr-test:latest" + exit 1 +fi + +IMAGE="$1" +VERSION="0.1.0" + +echo "Extracting memchr-test binary from ${IMAGE}..." + +# Get the entrypoint to find the memchr-test path +ENTRYPOINT=$(docker inspect "${IMAGE}" | jq -r '.[0].Config.Entrypoint | .[-1]') +echo "memchr-test binary location: ${ENTRYPOINT}" + +# Create a container from the image +CONTAINER=$(docker create "${IMAGE}") + +# Export the container filesystem and extract just the memchr-test binary +echo "Extracting from container..." +TEMP_DIR=$(mktemp -d) +docker export "${CONTAINER}" | tar -x -C "${TEMP_DIR}" "${ENTRYPOINT#/}" +cp "${TEMP_DIR}${ENTRYPOINT}" "./memchr-test-${VERSION}" +rm -rf "${TEMP_DIR}" + +# Clean up the container +docker rm "${CONTAINER}" + +echo "Extracted memchr-test binary to: ./memchr-test-${VERSION}" +chmod +x "./memchr-test-${VERSION}" diff --git a/integration/test/Testlib/ModService.hs b/integration/test/Testlib/ModService.hs index 3939c17164f..34f859cd8c2 100644 --- a/integration/test/Testlib/ModService.hs +++ b/integration/test/Testlib/ModService.hs @@ -508,7 +508,17 @@ withProcess resource overrides service = do _ -> do config <- getConfig tempFile <- writeTempFile "/tmp" (execName <> "-" <> domain <> "-" <> ".yaml") (cs $ Yaml.encode config) - (_, Just stdoutHdl, Just stderrHdl, ph) <- createProcess (proc exe ["-c", tempFile]) {cwd = cwd, std_out = CreatePipe, std_err = CreatePipe} + -- Use extracted production binary for brig (debugging hack) + let brigBinary = case service of + Brig -> case cwd of + Nothing -> "./brig-5.25.0" + Just _ -> "../../brig-5.25.0" + _ -> exe + -- Wrap brig with qemu to enforce x86-64-v3 compatibility (explicitly disable AVX-512) + let (exePath, args) = case (service, execName) of + (Brig, "brig") -> ("qemu-x86_64", ["-cpu", "EPYC,-xsavec,-misalignsse,-topoext,-avx512f,-avx512dq,-avx512cd,-avx512bw,-avx512vl", brigBinary, "-c", tempFile]) + _ -> (brigBinary, ["-c", tempFile]) + (_, Just stdoutHdl, Just stderrHdl, ph) <- createProcess (proc exePath args) {cwd = cwd, std_out = CreatePipe, std_err = CreatePipe} let colorize = fromMaybe id (lookup execName processColors) void $ forkIO $ logToConsoleDebug (Just stdOut) colorize prefix stdoutHdl void $ forkIO $ logToConsoleDebug (Just stdErr) colorize prefix stderrHdl diff --git a/nix/local-haskell-packages.nix b/nix/local-haskell-packages.nix index 2e065b6d403..0fa685756c4 100644 --- a/nix/local-haskell-packages.nix +++ b/nix/local-haskell-packages.nix @@ -41,6 +41,7 @@ federator = hself.callPackage ../services/federator/default.nix { inherit gitignoreSource; }; galley = hself.callPackage ../services/galley/default.nix { inherit gitignoreSource; }; gundeck = hself.callPackage ../services/gundeck/default.nix { inherit gitignoreSource; }; + memchr-test = hself.callPackage ../services/memchr-test/default.nix { inherit gitignoreSource; }; proxy = hself.callPackage ../services/proxy/default.nix { inherit gitignoreSource; }; spar = hself.callPackage ../services/spar/default.nix { inherit gitignoreSource; }; wire-server-enterprise = hself.callPackage ../services/wire-server-enterprise/default.nix { inherit gitignoreSource; }; diff --git a/nix/wire-server.nix b/nix/wire-server.nix index dd2b183877f..92058c7f568 100644 --- a/nix/wire-server.nix +++ b/nix/wire-server.nix @@ -90,6 +90,7 @@ let team-info = [ "team-info" ]; wire-server-enterprise = [ "wire-server-enterprise" ]; migrate-features = [ "migrate-features" ]; + memchr-test = [ "memchr-test" ]; }; inherit (lib) attrsets; @@ -516,10 +517,12 @@ in (ps: with ps; [ black bokeh + capstone flake8 ipdb ipython protobuf + pyelftools pylint pyyaml requests @@ -545,6 +548,7 @@ in # linux-only, not strictly required tools pkgs.docker-compose (pkgs.telepresence.override { pythonPackages = pkgs.python310Packages; }) + pkgs.qemu # for testing x86-64-v3 compatibility ]; }; diff --git a/scan-all-binaries-in-image.sh b/scan-all-binaries-in-image.sh new file mode 100755 index 00000000000..22b70fc0d8c --- /dev/null +++ b/scan-all-binaries-in-image.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# scan-all-binaries-in-image.sh +# Scans all x86-64 ELF binaries in a Docker image for forbidden instructions + +set -euo pipefail + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + echo "Example: $0 quay.io/wire/brig:latest" + exit 1 +fi + +IMAGE="$1" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CAPSTONE_SCRIPT="$SCRIPT_DIR/capstone-scan.py" + +if [ ! -f "$CAPSTONE_SCRIPT" ]; then + echo "Error: capstone-scan.py not found at $CAPSTONE_SCRIPT" + exit 1 +fi + +echo "=========================================" +echo "Scanning all binaries in: $IMAGE" +echo "=========================================" + +# Pull the image +echo "Pulling image..." +docker pull "$IMAGE" + +# Create container and extract filesystem +CONTAINER=$(docker create "$IMAGE") +TEMP_DIR=$(mktemp -d) +trap 'docker rm $CONTAINER > /dev/null 2>&1; chmod -R +w $TEMP_DIR 2>/dev/null || true; rm -rf $TEMP_DIR' EXIT + +echo "Extracting filesystem..." +docker export "$CONTAINER" | tar -C "$TEMP_DIR" -x + +# Find all ELF binaries +echo "Finding ELF binaries..." +BINARIES=$(find "$TEMP_DIR" -type f -exec file {} \; | grep -i 'ELF.*x86-64' | cut -d: -f1) +BINARY_COUNT=$(echo "$BINARIES" | wc -l) + +echo "Found $BINARY_COUNT x86-64 ELF binaries to scan" +echo "" + +# Scan each binary +FAILED_COUNT=0 +PASSED_COUNT=0 +CURRENT=0 +FAILED_BINARIES=() + +while IFS= read -r binary; do + CURRENT=$((CURRENT + 1)) + rel_path="${binary#"$TEMP_DIR"}" + echo "[$CURRENT/$BINARY_COUNT] Scanning: $rel_path" + + if "$CAPSTONE_SCRIPT" "$binary" 2>&1; then + echo " ✓ OK" + PASSED_COUNT=$((PASSED_COUNT + 1)) + else + echo " ✗ FAILED - contains forbidden instructions!" + FAILED_COUNT=$((FAILED_COUNT + 1)) + FAILED_BINARIES+=("$rel_path") + fi + echo "" +done <<< "$BINARIES" + +# Summary +echo "=========================================" +echo "Summary for: $IMAGE" +echo "=========================================" +echo "Total binaries scanned: $BINARY_COUNT" +echo "Passed: $PASSED_COUNT" +echo "Failed: $FAILED_COUNT" + +if [ $FAILED_COUNT -gt 0 ]; then + echo "" + echo "Failed binaries:" + for failed_binary in "${FAILED_BINARIES[@]}"; do + echo " - $failed_binary" + done + echo "" + echo "❌ Image contains binaries with forbidden instructions!" + exit 1 +else + echo "" + echo "✅ All binaries are compliant!" + exit 0 +fi diff --git a/services/memchr-test/default.nix b/services/memchr-test/default.nix new file mode 100644 index 00000000000..c737a7dc86a --- /dev/null +++ b/services/memchr-test/default.nix @@ -0,0 +1,23 @@ +# WARNING: GENERATED FILE, DO NOT EDIT. +# This file is generated by running hack/bin/generate-local-nix-packages.sh and +# must be regenerated whenever local packages are added or removed, or +# dependencies are added or removed. +{ mkDerivation +, base +, bytestring +, gitignoreSource +, lib +, primitive +, text +}: +mkDerivation { + pname = "memchr-test"; + version = "0.1.0"; + src = gitignoreSource ./.; + isLibrary = false; + isExecutable = true; + executableHaskellDepends = [ base bytestring primitive text ]; + description = "Minimal test service for memchr"; + license = lib.licenses.agpl3Only; + mainProgram = "memchr-test"; +} diff --git a/services/memchr-test/exec/Main.hs b/services/memchr-test/exec/Main.hs new file mode 100644 index 00000000000..081e9e48c37 --- /dev/null +++ b/services/memchr-test/exec/Main.hs @@ -0,0 +1,32 @@ +module Main (main) where + +import Data.Text qualified as T +import Data.Text.Internal.Search qualified as TS +import System.Environment (getArgs) +import System.IO (hPrint, stderr) +import Prelude + +-- Call path to AVX512 instructions: +-- main -> TS.indices -> scanOne -> _hs_text_memchr -> AVX512 code +-- +-- Not all Strings hit _hs_text_memchr. A good input for this is e.g. "AWAA" +main :: IO () +main = do + args <- getArgs + input <- case args of + (str : _) -> pure $ T.pack str + [] -> error "Please provide a string argument" + + -- This calls TS.indices which triggers the call chain to AVX512 + -- Force strict evaluation of the entire list + -- Use a multi-character pattern to trigger the memchr path + let result = TS.indices (T.pack "WA") input + !len = length result -- Force evaluation of the list + !first = case result of + [] -> -1 + (x : _) -> x + + hPrint stderr (T.length input) + hPrint stderr len + print first + print result diff --git a/services/memchr-test/memchr-test.cabal b/services/memchr-test/memchr-test.cabal new file mode 100644 index 00000000000..cee136c616a --- /dev/null +++ b/services/memchr-test/memchr-test.cabal @@ -0,0 +1,38 @@ +cabal-version: 1.12 +name: memchr-test +version: 0.1.0 +synopsis: Minimal test service for memchr +description: + A minimal service that calls Data.Text.Internal.ArrayUtils.memchr + +category: Test +author: Wire Swiss GmbH +maintainer: Wire Swiss GmbH +copyright: (c) 2026 Wire Swiss GmbH +license: AGPL-3 +build-type: Simple + +flag static + description: Enable static linking + manual: True + default: False + +executable memchr-test + main-is: exec/Main.hs + default-extensions: + MagicHash + NoImplicitPrelude + OverloadedStrings + UnboxedTuples + + ghc-options: -O0 -Wall -threaded -rtsopts -with-rtsopts=-N + build-depends: + base >=4.6 && <5 + , bytestring + , primitive + , text >=1.1 + + if flag(static) + ld-options: -static + + default-language: GHC2021