From e6495672a58c48674eb8ec30cdc7f37ce7d57776 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 17 Jul 2025 15:59:38 +1000 Subject: [PATCH 1/5] add docker-helper.sh to lift examples using pac-nix docker image this introduces the src/test/make/docker-helper.sh script which is a (hopefully) developer-friendly method for lifting examples in a Docker container provided by [pac-nix][]. to ensure reproducibility, this uses an x86_64-linux docker container with cross-compiler provided by nix. for more details, also see https://github.com/UQ-PAC/BASIL/pull/288. the usage is comprehensively documented in [nix-docker-build readme]. the general flow is: 1. activate the docker-helper.sh environment: `eval $(docker-helper.sh env)`. (you will need to re-run this if you close your terminal) 2. pull the docker image: `docker-helper.sh pull` 3. start the docker container: `docker-helper.sh start` 4. (optional) clean the directory you want to lift: `make SUBDIRS=extraspec_incorrect/malloc_memcpy_strlen_memset_free clean` 5. compile and lift the directory: `make SUBDIRS=extraspec_incorrect/malloc_memcpy_strlen_memset_free` 6. commit the results (eventually, we want to move the binaries into another repo. but until then, committing is fine.) [pac-nix]: https://github.com/katrinafyi/pac-nix/ [nix-docker-build readme]: https://github.com/UQ-PAC/BASIL/blob/nix-docker-build/src/test/readme.md --- .gitattributes | 8 ++ src/test/Makefile | 64 +++++++++-- src/test/make/bap-normalise.py | 113 ++++++++++++++++++++ src/test/make/docker-flake.txt | 1 + src/test/make/docker-helper.sh | 172 ++++++++++++++++++++++++++++++ src/test/make/gcc.mk | 1 + src/test/make/gcc_O2.mk | 2 +- src/test/make/gcc_pic.mk | 2 +- src/test/make/lift-directories.mk | 67 +++++++++--- src/test/make/lift.mk | 93 ++++++++++++---- 10 files changed, 482 insertions(+), 41 deletions(-) create mode 100755 src/test/make/bap-normalise.py create mode 100644 src/test/make/docker-flake.txt create mode 100755 src/test/make/docker-helper.sh diff --git a/.gitattributes b/.gitattributes index 3d7ef3bfa1..d5292c05dc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,11 @@ *.mill linguist-language=Scala mill linguist-generated mill.bat linguist-generated + +src/test/compiled.url.txt text eol=lf +src/test/make/docker-flake.txt text eol=lf + +Makefile text eol=lf +*.sh text eol=lf +*.mk text eol=lf +*.md5sum text eol=lf linguist-generated diff --git a/src/test/Makefile b/src/test/Makefile index 63c6d5fb5a..5d2f7dc0a7 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -1,12 +1,60 @@ -TARGETSS := all verify clean cleanall cleanlift recompile json cleanjson cleangts gts +TARGETSS := all verify repro-stash repro-check md5sum-check md5sum-update clean cleanall cleanlift recompile json cleanjson cleangts gts -SUBTARGETS = $(wildcard correct/*/ incorrect/*/) -.PHONY : $(TARGETSS) $(SUBTARGETS) correct incorrect +# subdirectories of src/test. to be entered into by this makefile. +DIRS := correct incorrect extraspec_correct extraspec_incorrect indirect_calls memory_regions procedure_summaries +# dirs with different directory structure: dsa, irreducible_loops -$(TARGETSS): $(SUBTARGETS) +# non-test dirs: make, scala, unimplemented -correct: $(realpath $(wildcard correct/*)) -incorrect: $(realpath $(wildcard incorrect/*)) +# in case the user specfies DIRS, make sure all dirs exist. +$(foreach d, $(DIRS), \ + $(if $(wildcard $(d)/.), \ + , \ + $(error user error: directory "$(d)" in DIRS variable does not exist))) + +SUBDIRS = $(wildcard $(addsuffix /*/,$(DIRS))) +.PHONY : $(TARGETSS) $(SUBDIRS) $(DIRS) + +# through some unpleasantness, this lets the user specify either DIRS or SUBDIRS +# on the command line, and the make operation will be narrowed to that directory +$(TARGETSS): $(SUBDIRS) + +$(SUBDIRS): + $(MAKE) -C $@ -f $(realpath ./make/lift-directories.mk) $(MAKECMDGOALS) + +# concats md5sums files in subdirectories into a compiled.md5sum. +# check with `md5sum -c compiled.md5sum` in src/test. +.PHONY: compiled.md5sum +compiled.md5sum: + find $(DIRS) -name '*.md5sum' -exec cat '{}' + | sort -k2 > compiled.md5sum + +TARBALL := compiled.tar.zst + +$(TARBALL) docker-contents.txt &: compiled.md5sum + set -u; $$DOCKER_CMD hash > docker-contents.txt # before compessing, make sure docker-contents.txt is up to date. + md5sum --quiet -c compiled.md5sum # before compressing, make sure our files match expected hashes. + list=`mktemp`; cut -d' ' -f3 compiled.md5sum > $$list && tar caf $(TARBALL) -T $$list && rm $$list + sha1sum $(TARBALL) + +.PHONY: extract +extract: + # log URL and expected hash + { head -n1 compiled.url.txt; tail -n1 compiled.url.txt; } | cat -v + # check existing file, otherwise download fresh copy. + { tail -n1 compiled.url.txt | sha1sum -c - ; } \ + || curl "$$(head -n1 compiled.url.txt)" -o $(TARBALL) + # check file type. + -file $(TARBALL) + # validate the hash, otherwise remove the incorrect file and abort. + { tail -n1 compiled.url.txt | sha1sum -c - ; } || { rm -v $(TARBALL); exit 1; } + tar xf $(TARBALL) --keep-old-files --touch + md5sum --quiet -c compiled.md5sum # check that extracted files match expected checksums + +.PHONY: push +push: + tmp=`mktemp -d` && \ + git clone git@github.com:UQ-PAC/basil-tests.git $$tmp --single-branch --branch basil-src-test && \ + cd $$tmp && \ + + rm -rf $$tmp -$(SUBTARGETS): - -$(MAKE) -C $@ -f $(realpath ./make/lift-directories.mk) $(MAKECMDGOALS) diff --git a/src/test/make/bap-normalise.py b/src/test/make/bap-normalise.py new file mode 100755 index 0000000000..a7459de7a8 --- /dev/null +++ b/src/test/make/bap-normalise.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# vim: ts=2 sts=2 et sw=2 + +""" +BAP .adt / .bir file normaliser: + +usage: +bap-normalise.py ADT-FILE BIR-FILE + +both arguments are required in that order! +files will be modified in-place. + ++00000540: main_argv :: in out u64 = R1 ++00000541: main_result :: out u32 = low:32[R0] + + 00000199: + 0000019c: R1 := 0x41F000 +@@ -187,7 +187,7 @@ + 000001c8: call R30 with noreturn + + 00000515: sub register_tm_clones(register_tm_clones_result) +-00000529: register_tm_clones_result :: out u32 = low:32[R0] ++00000542: register_tm_clones_result :: out u32 = low:32[R0] + + 0000028e: + 00000291: R0 := 0x420000 +@@ -199,13 +199,13 @@ + 000002b6: R1 := R2 + (R1 ~>> 3) + 000002bc: R1 := extend:64[63:1[R1]] + 000002c2: when R1 = 0 goto %000002c0 +-00000516: goto %00000337 ++0000052f: goto %00000337 + + 00000337: + 0000033a: R2 := 0x41F000 + 00000341: R2 := mem[R2 + 0xFF8, el]:u64 + 00000346: when R2 = 0 goto %000002c0 +-00000517: goto %0000034a ++00000530: goto %0000034a + +""" + +import sys +import re + +adt_file = sys.argv[1] +bir_file = sys.argv[2] +assert len(sys.argv) == 3 + +string_re = re.compile(rb'''"((?:[^"\\]|\\.)*)"''') +hexstring_re = re.compile(rb'''"%([\da-fA-F]{8})"''') +tid_re = re.compile(rb'''Tid\(([_\d]+),''') +bir_re = re.compile(rb'''(?:^([\da-fA-F]{8}):)|(?: %([\da-fA-F]{8}))''', re.MULTILINE) + +with open(adt_file, 'rb') as f: + adt = f.read() + +tids: dict[int, int] = {} # map of old tid to their first position in adt +for match in re.finditer(tid_re, adt): + tid = int(match[1].replace(b'_', b'')) + if tid not in tids: + tids[tid] = match.start() + +assert tids, f'adt file {adt_file} has no Tid() values??' + +keys = list(tids.keys()) +keys.sort(key=tids.__getitem__) + +new_tids = {tid: 4*i for i, tid in enumerate(keys)} + +# .adt file + +def sub_adt(m: re.Match[bytes]) -> bytes: + tid = int(m[1].replace(b'_', b'')) + new = new_tids[tid] + return f'Tid({new:_},'.encode('ascii') +def sub_adt_strings(m: re.Match[bytes]) -> bytes: + tid = int(m[1], 16) + new = new_tids[tid] + return f'"%{new:08x}"'.encode('ascii') + +new_adt = re.sub(tid_re, sub_adt, adt) +new_adt = re.sub(hexstring_re, sub_adt_strings, new_adt) + +# .bir file + +# print(new_tids) +bir_seen = set() +def sub_bir(m: re.Match[bytes]) -> bytes: + old = m[1] or m[2] + tid = int(old, 16) + bir_seen.add(tid) + assert tid in new_tids, f"{m}" + new = new_tids[tid] + return m[0].replace(old, f'{new:08x}'.encode('ascii')) + +with open(bir_file, 'rb') as f: + bir = f.read() + +new_bir = re.sub(bir_re, sub_bir, bir) +adt_seen = set(new_tids) +assert bir_seen == adt_seen, f'not equal!\nbir - adt =\n{bir_seen - adt_seen}\nadt - bir =\n{adt_seen - bir_seen}' + +assert new_adt != adt +assert new_bir != bir + +# writeback only if both are successful + +with open(bir_file, 'wb') as f: + f.write(new_bir) + +with open(adt_file, 'wb') as f: + f.write(new_adt) diff --git a/src/test/make/docker-flake.txt b/src/test/make/docker-flake.txt new file mode 100644 index 0000000000..d7cfe8b6ff --- /dev/null +++ b/src/test/make/docker-flake.txt @@ -0,0 +1 @@ +github:katrinafyi/pac-nix/569afdf78558de82c24d25e12680157c3b0aa3df#basil-tools-docker diff --git a/src/test/make/docker-helper.sh b/src/test/make/docker-helper.sh new file mode 100755 index 0000000000..7d9ebcd8a6 --- /dev/null +++ b/src/test/make/docker-helper.sh @@ -0,0 +1,172 @@ +#!/usr/bin/env bash +set -ue + +if [[ -z "${GIT_ROOT:-}" ]] && command -v git &>/dev/null; then + GIT_ROOT=$(git rev-parse --show-toplevel) +fi +: ${GIT_ROOT} +DIR=$(realpath --relative-to "$GIT_ROOT" .) + +: ${DOCKER:=podman} +: ${DOCKER_PLATFORM:=--platform linux\/amd64} +: ${DOCKER_USER:=root} +: ${DOCKER_IMAGE:=ghcr.io/uq-pac/basil-tools-docker} + +if [[ $# -lt 1 ]] || [[ "$1" == --help ]]; then + echo "usage: $(basename $0) (pull | push | build | start | stop | shell | hash | env [--unset] | COMMAND...)" + ! [[ $# -lt 1 ]] + exit +fi + +DOCKER_CMD="$(realpath $0)" + +if [[ -z "${DOCKER_FLAKE:-}" ]] && [[ -r "$(dirname $DOCKER_CMD)/docker-flake.txt" ]]; then + DOCKER_FLAKE="$(cat $(dirname $DOCKER_CMD)/docker-flake.txt)" +fi + +: $DOCKER_FLAKE + +# create unique names depending on the flake reference, to ensure the correct container +# is used. +# unique names depend only on DOCKER_FLAKE, allowing them to be computed without nix. +commit=$(printf '%s' "$DOCKER_FLAKE" | grep --only-matching -E '[0-9a-fA-F]{40}' | head -c8) +flake_hash=flake-$(printf '%s' "$DOCKER_FLAKE" | md5sum | cut -d' ' -f1 | head -c4) + +if [[ -z "${DOCKER_TAG:-}" ]]; then + DOCKER_TAG="$flake_hash-$commit" +fi + +unique_image="$DOCKER_IMAGE:$DOCKER_TAG" +unique_container="container-$DOCKER_TAG" + +# this allows the env subcommand to output syntax compatible with multiple shells +shell=$(basename $SHELL) +if [[ $shell == fish ]]; then + unset='set --erase' + unalias='functions --erase' + eval='(' +else + unset=unset + unalias=unalias + eval='$(' +fi + + +if [[ "$1" == pull ]]; then + # pulls the unique image from the registry + set -x + exec $DOCKER pull $DOCKER_PLATFORM "$unique_image" + +elif [[ "$1" == push ]]; then + # pushes the unique image to the registry. image must already exist locally. + set -x + exec $DOCKER push "$unique_image" + +elif [[ "$1" == build ]]; then + # builds the docker image for running tools. + # safe to re-run. if docker image is already up-to-date, should be reasonably fast. + nix build "$DOCKER_FLAKE" --no-link + nix build "$DOCKER_FLAKE.conf" --no-link + conf=$(nix build "$DOCKER_FLAKE.conf" --no-link --print-out-paths) + tag=$(nix eval --expr "with builtins; (fromJSON (unsafeDiscardStringContext (readFile $conf))).repo_tag" --impure --raw) + if ! [[ "$tag" == "$DOCKER_IMAGE":* ]]; then + printf '%s %s %s.\n' \ + "ERROR: docker image names do not match!" \ + "nix flake will build '$tag', but" \ + "DOCKER_IMAGE is '$DOCKER_IMAGE'" >&2 + exit 1 + fi + set -x + $(nix build "$DOCKER_FLAKE" --no-link --print-out-paths) | "$DOCKER" image load + $DOCKER image tag "$tag" $unique_image + exit + +elif [[ "$1" == start ]]; then + # starts an instance of the docker image. + set -x + exec $DOCKER run $DOCKER_PLATFORM -v"$GIT_ROOT:$GIT_ROOT" --rm -td --user $DOCKER_USER --name $unique_container $unique_image + +elif [[ "$1" == stop ]]; then + # stops the instance of the docker image. + set -x + exec $DOCKER stop -t 1 $unique_container + # since --rm is given to `docker run`, this will also remove the container. + +elif [[ "$1" == shell ]]; then + # enters an interactive shell within the container. + set -x + exec $DOCKER exec -it --user $DOCKER_USER -w "$GIT_ROOT/$DIR" -eshell=1 $unique_container /usr/bin/_exec bash + +elif [[ "$1" == hash ]]; then + # outputs information about the docker image's version to stdout. + echo "$DOCKER_FLAKE" + echo + exec "$DOCKER_CMD" bash -c 'ls -1 /nix/store | sort -k1.33' # sort /nix/store contents by name, not hash + +elif [[ "$1" == env ]]; then + # outputs commands to set the environment to stdout. + # when passed to `eval`, these commands should prepare the shell for running + # basil tests through docker. + + # if --unset is used, removes all definitions + isunset=$([[ $# -ge 2 ]] && [[ "$2" == --unset ]] && echo true || echo false) + # if --reset is used, removes all definitions, then re-adds them based on defaults + isreset=$([[ $# -ge 2 ]] && [[ "$2" == --reset ]] && echo true || echo false) + + if $isreset; then + isunset=true + fi + + function echoexport() { + if $isunset; then + echo echo "$unset" "$1" ';' + echo "$unset" "$1" ';' + return + fi + printf 'echo "%s = %s";\n' "$1" "$2" + printf 'export %s="%s";\n' "$1" "$2" + } + + echoexport USE_DOCKER "1" + echoexport DOCKER_FLAKE "$DOCKER_FLAKE" + echoexport DOCKER_IMAGE "$DOCKER_IMAGE" + echoexport DOCKER_TAG "$DOCKER_TAG" + echoexport DOCKER_PLATFORM "$DOCKER_PLATFORM" + echoexport DOCKER "$DOCKER" + echoexport DOCKER_USER "$DOCKER_USER" + echoexport DOCKER_CMD "$DOCKER_CMD" + echoexport GIT_ROOT "$GIT_ROOT" + echo 'echo;' + echoexport GCC "$DOCKER_CMD aarch64-unknown-linux-gnu-gcc" + echoexport CLANG "$DOCKER_CMD aarch64-unknown-linux-gnu-clang" + echoexport READELF "$DOCKER_CMD aarch64-unknown-linux-gnu-readelf" + echoexport BAP "$DOCKER_CMD bap" + echoexport DDISASM "$DOCKER_CMD ddisasm" + echoexport PROTO_JSON "$DOCKER_CMD proto-json.py" + # echoexport PROTO_JSON "/home/rina/progs/gtirb-semantics/scripts/proto-json.py" + echoexport DEBUG_GTS "$DOCKER_CMD debug-gts.py" + echoexport GTIRB_SEMANTICS "$DOCKER_CMD gtirb-semantics" + echo 'echo;' + if $isunset; then + echo "echo $unalias docker-helper.sh;" + echo "$unalias docker-helper.sh;" + else + echo "echo alias docker-helper.sh = '$DOCKER_CMD';" + echo "alias 'docker-helper.sh=$DOCKER_CMD';" + fi + + if $isreset; then + echo "eval $eval$DOCKER_CMD env);" + fi + exit +fi + +if [[ -n "${NIX_BUILD_TOP:-}" ]]; then + set -x + # if already inside a Nix shell, simply execute + exec /usr/bin/_exec "$@" +else + set -x + # for other commands, execute within the container. + exec $DOCKER exec --user $DOCKER_USER -w "$GIT_ROOT/$DIR" $unique_container /usr/bin/_exec "$@" +fi diff --git a/src/test/make/gcc.mk b/src/test/make/gcc.mk index 4f9fdeaae0..74e2190e55 100644 --- a/src/test/make/gcc.mk +++ b/src/test/make/gcc.mk @@ -1,2 +1,3 @@ CC=$(GCC) +CFLAGS += -pie include $(GIT_ROOT)/src/test/make/lift.mk diff --git a/src/test/make/gcc_O2.mk b/src/test/make/gcc_O2.mk index 72f49363f1..bfe3dfd196 100644 --- a/src/test/make/gcc_O2.mk +++ b/src/test/make/gcc_O2.mk @@ -1,3 +1,3 @@ CC=$(GCC) -CFLAGS += -O2 +CFLAGS += -pie -O2 include $(GIT_ROOT)/src/test/make/lift.mk diff --git a/src/test/make/gcc_pic.mk b/src/test/make/gcc_pic.mk index b107b722d0..6d2b6e01d5 100644 --- a/src/test/make/gcc_pic.mk +++ b/src/test/make/gcc_pic.mk @@ -1,3 +1,3 @@ CC=$(GCC) -CFLAGS += -fpic +CFLAGS += -pie -fpic include $(GIT_ROOT)/src/test/make/lift.mk diff --git a/src/test/make/lift-directories.mk b/src/test/make/lift-directories.mk index cbb55a3005..423460890a 100644 --- a/src/test/make/lift-directories.mk +++ b/src/test/make/lift-directories.mk @@ -2,11 +2,32 @@ # Run from the directory basil/src/test/*/test_case/ +# - lift-directories.mk: sets NAME +# - config.mk: sets ENABLED_COMPILERS +# - exec: gcc_pic.mk: sets CC / CFLAGS +# - lift.mk: sets CONFIG to compiler variant + +NAME := $(notdir $(shell pwd)) + +COMMON_ARTEFACTS := $(NAME).relf +BAP_ARTEFACTS := $(NAME).adt $(NAME).bir +GTIRB_ARTEFACTS := $(NAME).gts + +ALL_ARTEFACTS := $(BAP_ARTEFACTS) $(COMMON_ARTEFACTS) $(GTIRB_ARTEFACTS) # - means continue if it doesnt exist -include ./config.mk -NAME=$(notdir $(shell pwd)) +LIFT_ARTEFACTS ?= $(BAP_ARTEFACTS) $(COMMON_ARTEFACTS) $(GTIRB_ARTEFACTS) + GIT_ROOT?=$(realpath ../../../../) +BUILD_DIR ?= $(shell realpath --relative-to $(GIT_ROOT) .) +MAKE_DIR ?= $(GIT_ROOT)/src/test/make + +ifeq ($(USE_DOCKER),1) + ENSURE_DOCKER := true "using docker" && +else + ENSURE_DOCKER := echo "this command should be run within docker" >&2 && false && +endif #CFLAGS=-fno-pic -fno-plt TARGET=aarch64-linux-gnu @@ -14,28 +35,50 @@ GCC ?= aarch64-linux-gnu-gcc CLANG ?= clang-15 -target $(TARGET) CC ?= $(GCC) -BAP?=bap +BAP ?= bap READELF ?= aarch64-linux-gnu-readelf + +DDISASM ?= ddisasm +PROTO_JSON ?= proto-json.py +DEBUG_GTS ?= debug-gts.py +GTIRB_SEMANTICS ?= gtirb-semantics + BASIL=$(GIT_ROOT)/target/scala-3.3.1/wptool-boogie-assembly-0.0.1.jar -C_SOURCE ?=$(realpath $(wildcard *.c)) -SPEC ?=$(realpath $(wildcard *.spec)) -EXTRA_SPEC ?=$(realpath $(wildcard *.bpl)) -BASIL_FLAGS ?= +# paths below are relative to lift.mk's compilation_variant directory. +# note, wildcard is relative to test_case directory. +C_SOURCE ?=$(addprefix ../,$(wildcard *.c)) +SPEC ?=$(addprefix ../,$(wildcard *.spec)) +EXTRA_SPEC ?=$(addprefix ../,$(wildcard *.bpl)) +BASIL_FLAGS ?= #BOOGIE_FLAGS=/proverOpt:O:smt.array.extensional=false BOOGIE_FLAGS ?= /useArrayAxioms -LIFT_ARTEFACTS=$(NAME).adt $(NAME).bir $(NAME).relf $(NAME).gts - ENABLED_COMPILERS ?= clang clang_O2 clang_pic gcc gcc_O2 gcc_pic -TARGETS := all verify clean cleanall cleanlift cleanjson cleangts cleantest recompile json gts -.PHONY : $(TARGETS) $(ENABLED_COMPILERS) +TARGETS := all verify repro-stash repro-check md5sum-check md5sum-update clean cleanall cleanlift cleanjson cleangts cleantest recompile json gts +.PHONY : $(TARGETS) $(ENABLED_COMPILERS) docker-start docker-stop $(TARGETS): $(ENABLED_COMPILERS) +# an empty test case directory may be present for a number of reasons (e.g. git branch switching). +# ignore such directories to avoid more cryptic errors later. +ifeq ($(C_SOURCE),) + +# more validity checks +ifneq ($(SPEC)$(EXTRA_SPEC),) + $(error invalid test case: "$(realpath .)" has .bpl or .spec files but no C file) +endif + +$(ENABLED_COMPILERS): + @echo 'note: skipping directory "$(realpath .)" with no C files...' + +else + $(ENABLED_COMPILERS): mkdir -p $@/ - # - continue if fails - -$(MAKE) -C $(realpath $@) -f $(GIT_ROOT)/src/test/make/$@.mk $(MAKECMDGOALS) + $(MAKE) -C $(realpath .)/$@ -f $(MAKE_DIR)/$@.mk $(MAKECMDGOALS) + +endif + diff --git a/src/test/make/lift.mk b/src/test/make/lift.mk index 755768f715..ff396ea43c 100644 --- a/src/test/make/lift.mk +++ b/src/test/make/lift.mk @@ -1,14 +1,67 @@ # Run from the directory basil/src/test/*/test_case/compilation_variant/ -$(LIFT_ARTEFACTS): a.out +MD5SUM_FILE := $(NAME).md5sum + +# compilation variant name. be careful of trailing whitespace! +CONFIG := $(notdir $(realpath .)) + +# override with config-specific lifter selection +ifdef LIFT_ARTEFACTS_$(CONFIG) + LIFT_ARTEFACTS := $(LIFT_ARTEFACTS_$(CONFIG)) +endif + +# $(info $(realpath .) config=$(CONFIG) artefacts=$(LIFT_ARTEFACTS)) + +all: $(LIFT_ARTEFACTS) + +$(NAME).relf: a.out $(READELF) -s -r -W a.out > $(NAME).relf - $(BAP) a.out -d adt:$(NAME).adt -d bir:$(NAME).bir - ddisasm a.out --ir $(NAME).gtirb - gtirb-semantics $(NAME).gtirb $(NAME).gts +$(NAME).adt $(NAME).bir &: a.out + $(BAP) a.out -d adt:temp.adt -d bir:temp.bir + $(MAKE_DIR)/bap-normalise.py temp.adt temp.bir + mv temp.adt $(NAME).adt && mv temp.bir $(NAME).bir + +# if gtirb is missing but required by .gts, do not attempt to build it. +# essentially, missing intermediate files (here, gtirb) will not trigger +# a re-build so long as its products (here, gts) are up-to-date with respect +# to inputs (here, a.out) +.SECONDARY: $(NAME).gtirb +$(NAME).gtirb: a.out + $(DDISASM) a.out --ir $(NAME).temp.gtirb + $(PROTO_JSON) --idem=proto -s8 $(NAME).temp.gtirb $(NAME).gtirb # normalises protobuffer encoding + rm $(NAME).temp.gtirb +$(NAME).gts: $(NAME).gtirb + $(GTIRB_SEMANTICS) $(NAME).gtirb $(NAME).gts + +repro-stash: $(LIFT_ARTEFACTS) + rm -rfv repro-stash && mkdir -p repro-stash && mv -v $(LIFT_ARTEFACTS) $(realpath .)/repro-stash + +repro-check: $(LIFT_ARTEFACTS) + [ -d repro-stash ] # repro-stash must be executed before repro-check + bash -xeu -c 'cd $(realpath .); for f in $(LIFT_ARTEFACTS); do diff --color -u repro-stash/$$f $$f; done' + +BASE_DIR := $(GIT_ROOT)/src/test +RELATIVE_DIR := $(shell realpath --relative-to $(BASE_DIR) .) + +md5sum-check: a.out $(LIFT_ARTEFACTS) +ifeq ($(USE_DOCKER), 1) + # $(DOCKER_CMD) hash > docker-hash-new + # diff --color -u docker-hash docker-hash-new # if this fails, make sure your docker image is up-to-date. + # rm docker-hash-new + cd $(BASE_DIR) && md5sum -c $(realpath .)/$(MD5SUM_FILE) # using docker; checking compiler output hashes. +else + echo "not running within docker; skipping docker image validation." + cd $(BASE_DIR) && md5sum -c $(realpath .)/$(MD5SUM_FILE) +endif + +# paths in md5sum are relative to src/test, to allow for collation into a big md5sums file +md5sum-update: a.out $(LIFT_ARTEFACTS) + cd $(BASE_DIR) && md5sum $(addprefix $(RELATIVE_DIR)/,$^) > $(RELATIVE_DIR)/$(MD5SUM_FILE) # $^ is all specified dependencies + # $(ENSURE_DOCKER) $(DOCKER_CMD) hash > docker-hash -ifdef $(SPEC) -BASIL_SPECARG = --spec $(SPEC) +ifdef $(SPEC) +BASIL_SPECARG = --spec $(SPEC) endif # optional; create basil @@ -19,26 +72,25 @@ $(NAME)_gtirb.bpl: $(LIFT_ARTEFACTS) $(SPEC) $(BASIL) java -jar $(BASIL) $(BASIL_FLAGS) --input $(NAME).gts --relf $(NAME).relf -o $(NAME)_gtirb.bpl $(BASIL_SPECARG) .PHONY=$(BASIL) -$(BASIL): +$(BASIL): cd $(GIT_ROOT) && sbt assembly -# don't re-lift only if binary is missing +# don't re-lift only if binary is missing .SECONDARY: a.out a.out: $(C_SOURCE) - $(CC) $(CFLAGS) $(C_SOURCE) + $(CC) $(CFLAGS) '$(C_SOURCE)' -.PHONY=recompile verify clean cleanlift cleanall cleanbin cleantest cleangts json gts +.PHONY=all recompile verify repro-stash repro-check md5sum-check md5sum-update clean cleanlift cleanall cleanbin cleantest cleangts cleanrepro json gts verify: $(NAME)_bap.bpl $(NAME)_gtirb.bpl recompile: a.out -gts: a.out - ddisasm a.out --ir $(NAME).gtirb - gtirb-semantics $(NAME).gtirb $(NAME).gts - rm -rf $(NAME).gtirb +gts: $(NAME).gts -json: - debug-gts.py $(NAME).gts > $(NAME).json +$(NAME).json: $(NAME).gts + $(DEBUG_GTS) $(NAME).gts > $(NAME).json + +json: $(NAME).json $(NAME)_bap_result.txt: $(NAME)_bap.bpl $(EXTRA_SPEC) bash -c "time boogie $(NAME)_bap.bpl $(EXTRA_SPEC) $(BOOGIE_FLAGS) | tee $(NAME)_bap_result.txt" @@ -46,9 +98,9 @@ $(NAME)_bap_result.txt: $(NAME)_bap.bpl $(EXTRA_SPEC) $(NAME)_gtirb_result.txt: $(NAME)_gtirb.bpl $(EXTRA_SPEC) bash -c "time boogie $(NAME)_gtirb.bpl $(EXTRA_SPEC) $(BOOGIE_FLAGS) | tee $(NAME)_gtirb_result.txt" -cleanall: clean cleanlift cleanbin cleantest cleanjson +cleanall: clean cleanrepro cleantest -cleantest: +cleantest: rm -rf $(NAME).bpl rm -rf $(NAME)_bap.bpl rm -rf $(NAME)_gtirb.bpl @@ -60,6 +112,9 @@ cleanbin: rm -rf a.out rm -rf $(NAME).gtirb +cleanrepro: + rm -rf repro-stash + clean: cleanlift cleanbin cleanjson cleanjson: @@ -72,4 +127,4 @@ cleanlift: rm -rf $(NAME).gts cleangts: - rm -rf $(NAME).gts \ No newline at end of file + rm -rf $(NAME).gts From e77d227f5df6090ebbfd68b4631743f88949c0bd Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 17 Jul 2025 16:21:04 +1000 Subject: [PATCH 2/5] add readme --- src/test/readme.md | 413 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 src/test/readme.md diff --git a/src/test/readme.md b/src/test/readme.md new file mode 100644 index 0000000000..098c886034 --- /dev/null +++ b/src/test/readme.md @@ -0,0 +1,413 @@ +# Writing and Running Tests + +See [docs/development/readme.md](../../docs/development/readme.md) for context. + +This tests in this repository are a number of source code files which must be +compiled (through gcc and clang) and lifted (by BAP or ddisasm + ASLp) +before they can be processed. +This lifting is done deterministically in a Docker container. + + + +For much more detail about the lifting process, including how to add or edit +test cases, keep reading. + +## Introduction + +The directories in src/test are organised by category, then by test case. +For example, `src/test/correct/arrays_simple` is a test case called "arrays_simple" +in the "correct" category (meaning the output is expected to verify through Boogie). +Within each test case folder, +there is a single C source file with the name `[TESTCASE].c`, and +there are subdirectories for each compiler template +("clang", "gcc_O2", etc). + +The compiler subdirectories are where the compiler/lifter outputs will go. +These are excluded from the Git repository and their expected hashes +are recorded in the md5sums file. +There is also a ".expected" file which records the Boogie code +produced by BASIL. +The presence or absence of this expected file tells the test infrastructure +whether BASIL is expected to successfully process that test case. + + +## Lifting reproducibly with Docker + +The compiled files can vary between compilers, operating systems, and tool versions. +When developing and testing BASIL, it is important that everyone has identical +versions of these files to make sure results are consistent and comparable. + +We use a Docker image (built through Nix) as a static +environment for compiling and lifting examples. +The hashes of generated files, as well as the versions of programs within the Docker image, +are recorded within this repository. +This ensures everything stays reproducible and that everybody has identical copies of +the lifted test cases. + +### Setting up Docker +1. Install Podman or Docker through your system's package manager. +2. To set up environment variables, run this command: + ```bash + cd src/test + eval $(make/docker-helper.sh env) + ``` + This will load a number of environment variables into your shell which are + recognised by the Makefiles and subsequent docker-helper.sh calls. + This also adds a `docker-helper.sh` alias into the current shell session. + To de-activate this environment, use the same command with `--unset` after "env". + + The environment can be configured by setting environment variables before running the "env" + subcommand. See the "Customising Docker behaviour" subsection below. + +3. After setting up the environment, to pull the Docker image [from GHCR](https://github.com/UQ-PAC/BASIL/pkgs/container/basil-tools-docker), run: + ```bash + docker-helper.sh pull + ``` + The Docker image will take about 5GB of your disk space. + Note that it is an x86_64-linux image. + If you are on a different architecture, + you should configure your Docker to virtualise the x86_64 Linux platform. + + (The tag of the image is of the form "flake-HASH-COMMIT" + where COMMIT is the pac-nix commit where it originates, + and HASH is a hash of the Nix flake path which produced it.) + +5. To start an instance of the Docker image, + ```bash + docker-helper.sh start + ``` + To stop the instance, use the "stop" subcommand. + + Inside the Docker container, the Git repository root will be available at the same file path. + Files on your computer outside this directory will be unavailable unless manually mounted. + + **If you are on MacOS**, see the section near bottom for important notes. + +6. The last two steps will have to be repeated if the Docker image changes. + +#### Customising Docker options + +Before running `docker-helper.sh env`, several environment variables can be set +to influence its behaviour: + +- `DOCKER` is the name of the Docker program (default: podman). +- `DOCKER_USER` is the name/ID of a user _inside_ the container, to be used + to run the lifter commands (default: root). +- `DOCKER_FLAKE` is a Nix flake reference to a streamed Docker image builder (default: read from docker-flake.txt). +- `DOCKER_IMAGE` is name of the image, including the registry URL (default: ghcr.io/uq-pac/basil-tools-docker) +- `DOCKER_TAG` is the tag of the image (default: hashed from DOCKER_FLAKE). + +The values of these variables will be printed by `eval $(docker-helper.sh env)`. +Note that if you want to change these variables, you should first deactivate the environment, +make the changes, then re-activate. +This will make sure dependent variables are updated correctly. +```bash +eval $(docker-helper.sh env --unset) +# set variables here +export DOCKER=docker +# ... etc +eval $(docker-helper.sh env) +``` + +To use traditional `docker` instead of `podman`: +- By default, the container runner is `podman`. Podman is + an implementation of the Docker protocol which is rootless by default. + Containers are run as your usual user, without needing a separate system + service. Change `DOCKER` to "docker" to use the original Docker program. +- If you are using the traditional `docker` program in a _non_ rootless setup, + you will have to set `DOCKER_USER` to `UID:GID` where UID/GID are is the user/group ID of + your _outside_ user (obtain with `id -u` and `id -g`). + This will make sure the produced files are editable by your ordinary user. + +To use a custom Docker image, change `DOCKER_IMAGE` and `DOCKER_TAG` as necessary. +Note that changing these will likely break the "build" and "pull" subcommands, as they +expect a certain format of tags. + +### Building with Docker + +When the docker-helper.sh environment is active, +running make commands should automatically use the compilers from Docker. + +1. To compile and lift all the examples, use + ```bash + cd src/test + make -j6 # adjust job count as appropriate + ``` + as usual. + The log of make commands should mention docker or podman + while executing. If you see errors about "no container", + make sure the container is started with the steps above. + + Again, **please see the MacOS notes** if relevant. + + + +#### Customising the lifting + +Instead of `make` which performs all tests in src/test, +you can restrict it to particular categories or test cases. + +To lift one category of tests, use, e.g., +```bash +make DIRS=correct +``` +To lift a single test case (and all its compiler templates), use +```bash +make SUBDIRS=correct/arrays_simple +``` +To lift only some compiler templates, use +```bash +make SUBDIRS=correct/arrays_simple ENABLED_COMPILERS=gcc +``` +Each of these commands can be suffixed with specific make targets (e.g. "clean", "cleanall"). +Multiple terms can be given by quoting them, e.g. `make DIRS='correct incorrect'` +(arguments will be shell-separated). + +### Updating or adding test cases +If you change the source code for a test or add a new test case, +you will have to update its hashes as well. +You can use these steps to do so. + +1. Make your changes in the `src/test/[CATEGORY]/[TESTNAME]` directory. + A new test directory should have at least a C source file + and, optionally, a specification file. + The Makefiles should automatically detect new test cases. If adding a new category, + add this to the DIRS variable in the root Makefile. + + Lifting options specified in make/lift-directories.mk can be overriden using the config.mk file in each + test directory. For example, to specify the enabled lifting templates (i.e., compilers and compiler flags): + ```console + $ cat correct/test_name/config.mk + ENABLED_COMPILERS = clang clang_pic gcc gcc_pic + ``` + To use only a certain lifter, you can restrict the LIFT\_ARTEFACTS variable to GTIRB\_ARTEFACTS or BAP\_ARTEFACTS. + ```console + $ cat src/test/correct/jumptable2/config.mk + LIFT_ARTEFACTS := $(COMMON_ARTEFACTS) $(GTIRB_ARTEFACTS) + ``` + +3. Make sure the Docker environment is active with the earlier set up steps. +4. Run `make` to compile and lift your new files. + If you are only changing a subset of tests, you can limit this using the DIRS or SUBDIRS arguments + as shown above. + + +12. Commit and PR your changes. + + + +### Updating the Docker image + +The Docker image is pinned by make/docker-flake.txt. +This is a string which can be passed to `nix build` to produce the Docker image. +The Docker image is an x86_64-linux image and can only be built on this platform. + +To update the Docker image, first make the desired changes +to the [pac-nix](https://github.com/katrinafyi/pac-nix) repository, +then update this text file to point to the +relevant commit in pac-nix. +On a x86_64-linux machine, run: +```bash +eval $(make/docker-helper.sh env --reset) +docker-helper.sh build +docker-helper.sh start +``` +(If needed, authenticate with Github by `gh auth login --scopes write:packages`. +Then, get the token with `gh auth token` +and use this, along with your Github username, +in `docker login ghcr.io` [docs](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry)) + +After this, +follow the steps from "updating / adding test cases". +Make sure the changes are as you expect (e.g., if you only updated ASLp, only the ASLp-related +hashes should change). + +If this is all fine, push the new Docker to GHCR +``` +docker-helper.sh push +``` +then commit and push the updated hashes to basil. +Make sure to include the docker-contents.txt +in your Git changes. + +In the basil repository, it is a good idea to update the docker-flake.txt +and the recorded hashes in the same commit. +This makes sure that at every commit, the test cases can be correctly +built with the corresponding docker-flake.txt. + + + +### Support on MacOS + +Although the tooling is primarily developed on Linux, +efforts are made to keep compatibility with MacOS. +However, there are important to note if you are using a MacOS computer. +If you find additional notes, please add them here. + +- You will need **GNU coreutils**. Install with `brew install coreutils`, + then update PATH to use these by default, e.g., + `export PATH="/opt/homebrew/opt/coreutils/libexec/gnubin:$PATH"`. +- You will need **GNU make 4.3+**. Install with `brew install make`, + then use `gmake` in place of make in all commands. + If you do not have this, lifting may inexplicably fail + with "file not found" errors for intermediate files. +- If you see error 137 while making, especially if this happens with high job + counts, + you may need to increase the CPUs / memory allocated to your container runner. + For example with podman, + ```bash + podman machine stop + podman machine set --cpus 6 --memory 8192 + podman machine start + ``` +- The Nix packages aslp, bap, ddisasm, and gtirb-semantics are available + for the MacOS platform. + For testing local examples, you can install these native programs then follow the + "ad-hoc lifting" section below. + +### Additional notes + +- You can run a command within the Docker container with `docker-helper.sh `. + Note that this will not work with commands needing user interaction (e.g. shells). +- To enter an interactive shell within the Docker container, use `docker-helper.sh shell`. + +## Lifting ad-hoc examples locally + +The easiest method to lift an example (without adding it to the test cases) +is to use the `nix-cc.sh` script [described here](https://uq-pac.github.io/BASIL/quick-lifting.html). +This uses a Nix build sandbox to provide some level of isolation and reproducibility, +but this is not as strict as the Docker image. + +If you would like to use the lifters/compilers available on your local machine, +you can use the Makefiles without the Docker environment active +(it can be deactivated with `eval $(docker-helper.sh env --unset)`). +This is useful if Docker is especially slow on your computer, +or if you want to test local changes to a lifter. + + +The Makefiles are configured to fall back to programs on your PATH. +These names may be OS-specific or LLVM-version-specific. +You can override these manually by setting some environment variables: +GCC, CLANG, READELF, BAP, DDISASM, PROTO_JSON, DEBUG_GTS, GTIRB_SEMANTICS. + From 1e784865d1ed7f03a315a3e3b48cf41e5497452b Mon Sep 17 00:00:00 2001 From: rina Date: Wed, 23 Jul 2025 14:09:40 +1000 Subject: [PATCH 3/5] move docs into mdbook --- docs/src/SUMMARY.md | 1 + .../src/development/integration-tests.md | 43 ++++++++-- docs/src/development/testing.md | 82 ++++++++----------- 3 files changed, 71 insertions(+), 55 deletions(-) rename src/test/readme.md => docs/src/development/integration-tests.md (92%) diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index dba0070a0f..4ecaead9a3 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -31,6 +31,7 @@ # Devleopment Guidelines - [Testing](development/testing.md) + - [Integration (system) tests](development/integration-tests.md) - [Assertions](development/assertions.md) - [Scala](development/scala.md) diff --git a/src/test/readme.md b/docs/src/development/integration-tests.md similarity index 92% rename from src/test/readme.md rename to docs/src/development/integration-tests.md index 098c886034..6daddafb0b 100644 --- a/src/test/readme.md +++ b/docs/src/development/integration-tests.md @@ -1,11 +1,25 @@ -# Writing and Running Tests +# Integration (system) tests -See [docs/development/readme.md](../../docs/development/readme.md) for context. +Basil's integration tests are made up of a number of C source code examples and stored under the src/test directory. +These tests exercise the entire Basil pipeline, from +loading the GTIRB/BAP data and transforming the Basil IR, to +emitting and verifying a Boogie file. + +These test cases are also called "system tests" and are tagged under +the `AnalysisSystemTest*` and `StandardSystemTest` tags. + +The tests must compiled (through gcc and clang) and +lifted (by BAP or ddisasm + ASLp) before they can be processed. +This lifting is done deterministically in a Docker container, +and the lifted files are committed into the repository. + +Most test files are organised into either `src/test/correct` for examples that should verify and `src/test/incorrect` +for examples that should not verify. +A small number of test cases for specific test suites are stored +in other subfolders of `src/test`. + + -This tests in this repository are a number of source code files which must be -compiled (through gcc and clang) and lifted (by BAP or ddisasm + ASLp) -before they can be processed. -This lifting is done deterministically in a Docker container. -For much more detail about the lifting process, including how to add or edit -test cases, keep reading. ## Introduction @@ -242,6 +254,19 @@ You can use these steps to do so. If you are only changing a subset of tests, you can limit this using the DIRS or SUBDIRS arguments as shown above. +5. After changing or re-lifting the the test files, + you should update the expected BASIL output files. + These `.expected` files store the Boogie output from + a past run of Basil and are used to detect changes in Basil's + output. + + First, run the system test suites, then run: + ```bash + ./mill updateExpectedBAP + ./mill updateExpectedGTIRB + ./mill updateExpectedExtraSpec + ``` + -12. Commit and PR your changes. +6. Commit and PR your changes.