From 06e1e8a1401e01812d641f7eae1a90beb41ff6fc Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 14:20:16 +1000 Subject: [PATCH 01/28] sort out lexprs --- lib/transforms/aslp/bincaml_ibi_make.ml | 101 ++++++++++++------------ test/transforms/test_aslp.ml | 42 ++++++++-- 2 files changed, 83 insertions(+), 60 deletions(-) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index d8ec4988..f9347bf3 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -6,6 +6,16 @@ module Make (S : sig val bincaml_lifter_state : Aslp_state.lifter_state ref end) = struct + (** {2 Type definitions} *) + + type bigint = Z.t + type bitvector = Bitvec.t + type expr = Expr.BasilExpr.t + type lexpr = Aslp_lexpr.t + type stmt = Aslp_state.stmt + type branch = Branch of string + type ast = Aslp_state.aslp_state + (** {2 Bincaml-specific utility functions} *) let bincaml_emit stmt = @@ -23,18 +33,10 @@ struct id_name | Some x -> x in - Var.create id_name ty + Aslp_lexpr.Local (id_name, ty) (** {2 Instruction building interface implementation} *) - type bigint = Z.t - type bitvector = Bitvec.t - type expr = Expr.BasilExpr.t - type lexpr = Var.t - type stmt = Aslp_state.stmt - type branch = Branch of string - type ast = Aslp_state.aslp_state - let reset_ir () = let generator = !S.bincaml_lifter_state.generator in S.bincaml_lifter_state := Aslp_state.empty_lifter_state ~generator () @@ -159,37 +161,31 @@ struct let f_sdiv_int : bigint -> bigint -> bigint = fun _ -> failwith "sdiv int" let f_shl_int : bigint -> bigint -> bigint = fun _ -> failwith "shl int" - let v_PSTATE_C : lexpr = Var.create "v_PSTATE_C" (Types.Bitvector 1) - let v_PSTATE_Z : lexpr = Var.create "v_PSTATE_Z" (Types.Bitvector 1) - let v_PSTATE_V : lexpr = Var.create "v_PSTATE_V" (Types.Bitvector 1) - let v_PSTATE_N : lexpr = Var.create "v_PSTATE_N" (Types.Bitvector 1) - let v__PC : lexpr = Var.create "v__PC" (Types.Bitvector 1) - let v__R : lexpr = Var.create "v__R" (Types.Bitvector 0) - let v__Z : lexpr = Var.create "v__Z" (Types.Bitvector 0) - let v_SP_EL0 : lexpr = Var.create "v_SP_EL0" (Types.Bitvector 1) - let v_FPSR : lexpr = Var.create "v_FPSR" (Types.Bitvector 1) - let v_FPCR : lexpr = Var.create "v_FPCR" (Types.Bitvector 1) - let v_PSTATE_A : lexpr = Var.create "v_PSTATE_A" (Types.Bitvector 1) - let v_PSTATE_D : lexpr = Var.create "v_PSTATE_D" (Types.Bitvector 1) - let v_PSTATE_DIT : lexpr = Var.create "v_PSTATE_DIT" (Types.Bitvector 1) - let v_PSTATE_F : lexpr = Var.create "v_PSTATE_F" (Types.Bitvector 1) - let v_PSTATE_I : lexpr = Var.create "v_PSTATE_I" (Types.Bitvector 1) - let v_PSTATE_PAN : lexpr = Var.create "v_PSTATE_PAN" (Types.Bitvector 1) - let v_PSTATE_SP : lexpr = Var.create "v_PSTATE_SP" (Types.Bitvector 1) - let v_PSTATE_SSBS : lexpr = Var.create "v_PSTATE_SSBS" (Types.Bitvector 1) - let v_PSTATE_TCO : lexpr = Var.create "v_PSTATE_TCO" (Types.Bitvector 1) - let v_PSTATE_UAO : lexpr = Var.create "v_PSTATE_UAO" (Types.Bitvector 1) - let v_PSTATE_BTYPE : lexpr = Var.create "v_PSTATE_BTYPE" (Types.Bitvector 1) - - let v_BTypeCompatible : lexpr = - Var.create "v_BTypeCompatible" (Types.Bitvector 1) - - let v___BranchTaken : lexpr = Var.create "v___BranchTaken" (Types.Bitvector 1) - let v_BTypeNext : lexpr = Var.create "v_BTypeNext" (Types.Bitvector 1) - - let v___ExclusiveLocal : lexpr = - Var.create "v___ExclusiveLocal" (Types.Bitvector 1) - + let v_PSTATE_C : lexpr = PSTATE_C + let v_PSTATE_Z : lexpr = PSTATE_Z + let v_PSTATE_V : lexpr = PSTATE_V + let v_PSTATE_N : lexpr = PSTATE_N + let v__PC : lexpr = PC + let v__R : lexpr = R None + let v__Z : lexpr = Z None + let v_SP_EL0 : lexpr = SP_EL0 + let v_FPSR : lexpr = FPSR + let v_FPCR : lexpr = FPCR + let v_PSTATE_A : lexpr = PSTATE_A + let v_PSTATE_D : lexpr = PSTATE_D + let v_PSTATE_DIT : lexpr = PSTATE_DIT + let v_PSTATE_F : lexpr = PSTATE_F + let v_PSTATE_I : lexpr = PSTATE_I + let v_PSTATE_PAN : lexpr = PSTATE_PAN + let v_PSTATE_SP : lexpr = PSTATE_SP + let v_PSTATE_SSBS : lexpr = PSTATE_SSBS + let v_PSTATE_TCO : lexpr = PSTATE_TCO + let v_PSTATE_UAO : lexpr = PSTATE_UAO + let v_PSTATE_BTYPE : lexpr = PSTATE_BTYPE + let v_BTypeCompatible : lexpr = BTypeCompatible + let v___BranchTaken : lexpr = BranchTaken + let v_BTypeNext : lexpr = BTypeNext + let v___ExclusiveLocal : lexpr = ExclusiveLocal let f_switch_context : branch -> unit = fun _ -> failwith "f_switch_context" let f_gen_branch : expr -> branch * branch * branch = @@ -216,28 +212,29 @@ struct fun name size -> bincaml_local_var name (Types.Bitvector (Z.to_int size)) let f_decl_bool : string -> lexpr = fun _ -> failwith "f_decl_bool" - let f_gen_load : lexpr -> expr = fun lhs -> Expr.BasilExpr.rvar lhs + + let f_gen_load : lexpr -> expr = + fun lhs -> Expr.BasilExpr.rvar (Aslp_lexpr.to_var lhs) let f_gen_store : lexpr -> expr -> unit = fun lhs rhs -> bincaml_emit - (Stmt.Instr_Assign { attrib = Attrib.empty; al = [ (lhs, rhs) ] }) + (Stmt.Instr_Assign + { attrib = Attrib.empty; al = [ (Aslp_lexpr.to_var lhs, rhs) ] }) let f_gen_array_load : lexpr -> bigint -> expr = fun array idx -> - match Var.name array with - | "v__R" -> - f_gen_load (Var.create ("v__R" ^ Z.to_string idx) (Types.Bitvector 64)) - | x -> failwith x + match array with + | R None -> f_gen_load (R (Some (Z.to_int idx))) + | Z None -> f_gen_load (Z (Some (Z.to_int idx))) + | x -> failwith @@ "f_gen_array_load: " ^ Aslp_lexpr.show x let f_gen_array_store : lexpr -> bigint -> expr -> unit = fun array idx rhs -> - match Var.name array with - | "v__R" -> - f_gen_store - (Var.create ("v__R" ^ Z.to_string idx) (Types.Bitvector 64)) - rhs - | x -> failwith x + match array with + | R None -> f_gen_store (R (Some (Z.to_int idx))) rhs + | Z None -> f_gen_store (Z (Some (Z.to_int idx))) rhs + | x -> failwith @@ "f_gen_array_store: " ^ Aslp_lexpr.show x let f_gen_Elem_read : bigint -> bigint -> expr -> expr -> expr -> expr = fun _ -> failwith "f_gen_Elem_read" diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 5c53ac56..f3a6f579 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -27,13 +27,15 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = (Bitvec.of_string "0x8b031041:bv32") in print_endline @@ Aslp_state.show_aslp_state x; - [%expect - {| + [%expect {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = - [var var_0:bv64 := v__R2:bv64; var var_1:bv64 := v__R3:bv64; - var v__R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; + [var (Aslp_types.Lexpr.Local ("var_0", bv64)):bv64 := R2:bv64; + var (Aslp_types.Lexpr.Local ("var_1", bv64)):bv64 := R3:bv64; + var R1:bv64 := bvadd((Aslp_types.Lexpr.Local ("var_0", bv64)):bv64, + bvshl((Aslp_types.Lexpr.Local ("var_1", bv64)):bv64, 0x4:bv12)) + ]; succs = ["block_1"] }, "block_1" -> { Aslp_state.assume = None; stmts = []; succs = [] }; entry = "block_0"; exit = "block_1" } @@ -49,19 +51,43 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = (Bitvec.of_string "0xd29579a1:bv32") in print_endline @@ Aslp_state.show_aslp_state x; - [%expect - {| + [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = [var v__R1:bv64 := 0xabcd:bv64]; + -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; succs = ["block_1"] }, "block_1" -> { Aslp_state.assume = None; stmts = []; succs = ["block_2"] }, "block_2" - -> { Aslp_state.assume = None; stmts = [var v__R1:bv64 := 0xabcd:bv64]; + -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; succs = ["block_3"] }, "block_3" -> { Aslp_state.assume = None; stmts = []; succs = [] }; entry = "block_0"; exit = "block_3" } |}] +let%expect_test "lift: b.eq #1024" = + let module I = (val Bincaml_ibi.from_generator (Aslp_state.empty_aslp_ids ())) + in + let x = + lift_opcode + (module I) + ~address:(Bitvec.zero ~size:64) + (Bitvec.of_string "0x54002000:bv32") + in + print_endline @@ Aslp_state.show_aslp_state x; + [%expect.unreachable] +[@@expect.uncaught_exn {| + (* CR expect_test_collector: This test expectation appears to contain a backtrace. + This is strongly discouraged as backtraces are fragile. + Please change this test to not include a backtrace. *) + (Failure f_gen_eq_bits) + Raised at Stdlib.failwith in file "stdlib.ml", line 29, characters 17-33 + Called from OfflineASL_pc__Aarch64_branch_conditional_cond.f_aarch64_branch_conditional_cond in file "lib/pc/aarch64_branch_conditional_cond.ml", line 9, characters 28-162 + Called from Transforms__Aslp.lift_opcode.(fun) in file "lib/transforms/aslp/aslp.ml", line 18, characters 6-67 + Called from Stdlib__Fun.protect in file "fun.ml", line 34, characters 8-15 + Re-raised at Stdlib__Fun.protect in file "fun.ml", line 39, characters 6-52 + Called from Test_aslp.(fun) in file "test/transforms/test_aslp.ml", lines 79-82, characters 4-42 + Called from Ppx_expect_runtime__Test_block.Configured.dump_backtrace in file "runtime/test_block.ml", line 142, characters 10-28 + |}] + let%expect_test "aslp integration basic" = let lst = Loader.Loadir.ast_of_string From b351ec65b0c8d5718bc2312dbf81d20f39dfcc52 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 14:32:40 +1000 Subject: [PATCH 02/28] fix potential id clash because we were post-processing the unique IDs dont need helper functions anymore --- lib/transforms/aslp/aslp_state.ml | 22 +++++++++------------- lib/transforms/aslp/bincaml_ibi_make.ml | 4 +--- test/transforms/test_aslp.ml | 18 +++++++++--------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 0c6e88c5..0b43bb59 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -41,7 +41,7 @@ type aslp_state = { Alone, this is used to represent the lifter state {i between} instructions. Or, it forms a part of {!lifter_state} for {i within}-instruction state. *) -type aslp_ids = { block_ids : unit -> int; local_ids : unit -> int } +type aslp_ids = { block_id : unit -> string; local_id : unit -> string } (** Generators for unique IDs used by the offline lifter. The {!aslp_ids} is stateful and the same {!aslp_ids} should be used by all @@ -78,16 +78,13 @@ let empty_aslp_state ~entry ~exit () = in { blocks; entry; exit } -let gen_block_id gens = Printf.sprintf "block_%d" @@ gens.block_ids () -let gen_local_id gens = Printf.sprintf "var_%d" @@ gens.local_ids () - (** Constructs a new empty {!lifter_state}. Callers should consider whether they wish to re-use an existing [generator] value by passing it explicitly. *) let empty_lifter_state ~generator () = - let entry = gen_block_id generator in - let exit = gen_block_id generator in + let entry = generator.block_id () in + let exit = generator.block_id () in { active = entry; state = empty_aslp_state ~entry ~exit (); @@ -100,7 +97,9 @@ let empty_lifter_state ~generator () = Be careful! You should use {!aslp_ids_from_generators} if you will use the lifted statements within an existing Bincaml IR. *) let empty_aslp_ids () = - { block_ids = Fix.Gensym.make (); local_ids = Fix.Gensym.make () } + let block_id = Printf.sprintf "block_%d" % Fix.Gensym.make () + and local_id = Printf.sprintf "var_%d" % Fix.Gensym.make () in + { block_id; local_id } (** {2 ID-generating functions} *) @@ -110,12 +109,9 @@ let empty_aslp_ids () = This will ensure that ASLp's local variable and block names do not clash with existing names. *) let aslp_ids_from_generators ~block_ids ~local_ids = - let block_ids = ID.index % ID.fresh block_ids in - let local_ids = ID.index % ID.fresh local_ids in - { block_ids; local_ids } - -let gen_block_id = gen_block_id -let gen_local_id = gen_local_id + let block_id = ID.name % ID.fresh ~name:"block" block_ids + and local_id = ID.name % ID.fresh ~name:"var" local_ids in + { block_id; local_id } (** {1 State manipulation functions} *) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index f9347bf3..e8b16d1c 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -26,9 +26,7 @@ struct let id_name = match Hashtbl.find_opt !S.bincaml_lifter_state.names name with | None -> - let id_name = - Aslp_state.gen_local_id !S.bincaml_lifter_state.generator - in + let id_name = !S.bincaml_lifter_state.generator.local_id () in Hashtbl.replace !S.bincaml_lifter_state.names name id_name; id_name | Some x -> x diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index f3a6f579..8c5c3200 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -27,15 +27,13 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = (Bitvec.of_string "0x8b031041:bv32") in print_endline @@ Aslp_state.show_aslp_state x; - [%expect {| + [%expect + {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = - [var (Aslp_types.Lexpr.Local ("var_0", bv64)):bv64 := R2:bv64; - var (Aslp_types.Lexpr.Local ("var_1", bv64)):bv64 := R3:bv64; - var R1:bv64 := bvadd((Aslp_types.Lexpr.Local ("var_0", bv64)):bv64, - bvshl((Aslp_types.Lexpr.Local ("var_1", bv64)):bv64, 0x4:bv12)) - ]; + [var var_0:bv64 := R2:bv64; var var_1:bv64 := R3:bv64; + var R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; succs = ["block_1"] }, "block_1" -> { Aslp_state.assume = None; stmts = []; succs = [] }; entry = "block_0"; exit = "block_1" } @@ -51,7 +49,8 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = (Bitvec.of_string "0xd29579a1:bv32") in print_endline @@ Aslp_state.show_aslp_state x; - [%expect {| + [%expect + {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; succs = ["block_1"] }, @@ -74,7 +73,8 @@ let%expect_test "lift: b.eq #1024" = in print_endline @@ Aslp_state.show_aslp_state x; [%expect.unreachable] -[@@expect.uncaught_exn {| +[@@expect.uncaught_exn + {| (* CR expect_test_collector: This test expectation appears to contain a backtrace. This is strongly discouraged as backtraces are fragile. Please change this test to not include a backtrace. *) @@ -84,7 +84,7 @@ let%expect_test "lift: b.eq #1024" = Called from Transforms__Aslp.lift_opcode.(fun) in file "lib/transforms/aslp/aslp.ml", line 18, characters 6-67 Called from Stdlib__Fun.protect in file "fun.ml", line 34, characters 8-15 Re-raised at Stdlib__Fun.protect in file "fun.ml", line 39, characters 6-52 - Called from Test_aslp.(fun) in file "test/transforms/test_aslp.ml", lines 79-82, characters 4-42 + Called from Test_aslp.(fun) in file "test/transforms/test_aslp.ml", lines 69-72, characters 4-42 Called from Ppx_expect_runtime__Test_block.Configured.dump_backtrace in file "runtime/test_block.ml", line 142, characters 10-28 |}] From e3f554588cf64f94739c5df0d9c9dc5831a8169e Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 14:50:23 +1000 Subject: [PATCH 03/28] fill out bv op gens --- lib/transforms/aslp/aslp_state.ml | 2 +- lib/transforms/aslp/bincaml_ibi_make.ml | 54 +++++++++++++++---------- test/transforms/test_aslp.ml | 4 +- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 0b43bb59..763d8506 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -51,7 +51,7 @@ type lifter_state = { active : string; (** Active block where new runtime statements will be appended. *) state : aslp_state; (** Lifter state representing a control flow diamond. *) - generator : aslp_ids; [@opaque] (** Generators for ID numbers. *) + generator : aslp_ids; [@opaque] (** Generators for ID names. *) names : (string, string) Hashtbl.t; (** Map of ASLp local variable names to the "ID-ified" names produced for Bincaml. diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index e8b16d1c..c4e19d66 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -260,62 +260,67 @@ struct let f_gen_AArch64_MemTag_read : expr -> expr -> expr = fun _ -> failwith "f_gen_AArch64_MemTag_read" - let f_gen_and_bool : expr -> expr -> expr = fun _ -> failwith "f_gen_and_bool" - let f_gen_or_bool : expr -> expr -> expr = fun _ -> failwith "f_gen_or_bool" - let f_gen_not_bool : expr -> expr = fun _ -> failwith "f_gen_not_bool" + let f_gen_and_bool : expr -> expr -> expr = + fun a b -> Expr.BasilExpr.applyintrin ~op:`AND [ a; b ] + + let f_gen_or_bool : expr -> expr -> expr = + fun a b -> Expr.BasilExpr.applyintrin ~op:`OR [ a; b ] + + let f_gen_not_bool : expr -> expr = + fun a -> Expr.BasilExpr.unexp ~op:`BoolNOT a let f_gen_cvt_bits_uint : bigint -> expr -> expr = fun _ -> failwith "f_gen_cvt_bits_uint" let f_gen_eq_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_eq_bits" + fun _ a b -> Expr.BasilExpr.binexp ~op:`EQ a b let f_gen_ne_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_ne_bits" + fun _ a b -> Expr.BasilExpr.binexp ~op:`EQ a b let f_gen_not_bits : bigint -> expr -> expr = - fun _ -> failwith "f_gen_not_bits" + fun _ a -> Expr.BasilExpr.unexp ~op:`BVNOT a let f_gen_cvt_bool_bv : expr -> expr = fun _ -> failwith "f_gen_cvt_bool_bv" let f_gen_or_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_or_bits" + fun _ a b -> Expr.BasilExpr.applyintrin ~op:`BVOR [ a; b ] let f_gen_eor_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_eor_bits" + fun _ a b -> Expr.BasilExpr.applyintrin ~op:`BVXOR [ a; b ] let f_gen_and_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_and_bits" + fun _ a b -> Expr.BasilExpr.applyintrin ~op:`BVAND [ a; b ] let f_gen_add_bits : bigint -> expr -> expr -> expr = - fun _ -> Expr.BasilExpr.binexp ?attrib:None ~op:`BVADD + fun _ a b -> Expr.BasilExpr.applyintrin ~op:`BVADD [ a; b ] let f_gen_sub_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_sub_bits" + fun _ a b -> Expr.BasilExpr.binexp ~op:`BVSUB a b let f_gen_sdiv_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_sdiv_bits" + fun _ a b -> Expr.BasilExpr.binexp ~op:`BVSDIV a b let f_gen_sle_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_sle_bits" + fun _ a b -> Expr.BasilExpr.binexp ~op:`BVSLE a b let f_gen_slt_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_slt_bits" + fun _ a b -> Expr.BasilExpr.binexp ~op:`BVSLT a b let f_gen_mul_bits : bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_mul_bits" + fun _ a b -> Expr.BasilExpr.applyintrin ~op:`BVMUL [ a; b ] let f_gen_append_bits : bigint -> bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_append_bits" + fun _ _ a b -> Expr.BasilExpr.applyintrin ~op:`BVConcat [ a; b ] let f_gen_lsr_bits : bigint -> bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_lsr_bits" + fun _ _ a b -> Expr.BasilExpr.binexp ~op:`BVLSHR a b let f_gen_lsl_bits : bigint -> bigint -> expr -> expr -> expr = - fun _ _ -> Expr.BasilExpr.binexp ?attrib:None ~op:`BVSHL + fun _ _ a b -> Expr.BasilExpr.binexp ~op:`BVSHL a b let f_gen_asr_bits : bigint -> bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_asr_bits" + fun _ _ a b -> Expr.BasilExpr.binexp ~op:`BVASHR a b (** [f_gen_replicate_bits operand_width num_replications operand num_replications] *) @@ -324,14 +329,19 @@ struct (** [f_gen_ZeroExtend operand_width result_width operand result_width] *) let f_gen_ZeroExtend : bigint -> bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_ZeroExtend" + fun op_wd final_wd x _ -> + Expr.BasilExpr.zero_extend ~n_prefix_bits:Z.(to_int (final_wd - op_wd)) x (** [f_gen_SignExtend operand_width result_width operand result_width] *) let f_gen_SignExtend : bigint -> bigint -> expr -> expr -> expr = - fun _ -> failwith "f_gen_SignExtend" + fun op_wd final_wd x _ -> + Expr.BasilExpr.sign_extend ~n_prefix_bits:Z.(to_int (final_wd - op_wd)) x + (** [f_gen_slice x lo wd] *) let f_gen_slice : expr -> bigint -> bigint -> expr = - fun _ -> failwith "f_gen_slice" + fun x lo_incl wd -> + let hi_excl = Z.(to_int (lo_incl - wd)) and lo_incl = Z.to_int lo_incl in + Expr.BasilExpr.extract ~lo_incl ~hi_excl x (* {1 Floating point intrinsics} *) diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 8c5c3200..e8b547a1 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -78,9 +78,9 @@ let%expect_test "lift: b.eq #1024" = (* CR expect_test_collector: This test expectation appears to contain a backtrace. This is strongly discouraged as backtraces are fragile. Please change this test to not include a backtrace. *) - (Failure f_gen_eq_bits) + (Failure f_gen_branch) Raised at Stdlib.failwith in file "stdlib.ml", line 29, characters 17-33 - Called from OfflineASL_pc__Aarch64_branch_conditional_cond.f_aarch64_branch_conditional_cond in file "lib/pc/aarch64_branch_conditional_cond.ml", line 9, characters 28-162 + Called from OfflineASL_pc__Aarch64_branch_conditional_cond.f_aarch64_branch_conditional_cond in file "lib/pc/aarch64_branch_conditional_cond.ml", line 42, characters 16-63 Called from Transforms__Aslp.lift_opcode.(fun) in file "lib/transforms/aslp/aslp.ml", line 18, characters 6-67 Called from Stdlib__Fun.protect in file "fun.ml", line 34, characters 8-15 Re-raised at Stdlib__Fun.protect in file "fun.ml", line 39, characters 6-52 From ba30114f0176fdab1b98d492828df1ed3b8c833b Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 15:15:47 +1000 Subject: [PATCH 04/28] has_pc_assign --- lib/transforms/aslp/aslp_state.ml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 763d8506..744f29e6 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -17,11 +17,18 @@ type aslp_block = { stmts : stmt CCVector.vector; [@printer Format.map CCVector.to_list pp_stmt_list_for_printing] succs : string list; + has_pc_assign : bool; + (** Whether this block (or any of its same-instruction predecessors) have + explicitly assigned to the [PC] variable. *) } [@@deriving show] (** An ASLp lifter block is a list of statements followed by a non-deterministic goto to a number of successors. Each block is optionally guarded by an - assume statement. *) + assume statement. + + We maintain the invariant that at the end of every {!aslp_block}, the [PC] + variable is either assigned on all paths or assigned on no paths (from the + beginning of the instruction). *) type aslp_state = { blocks : aslp_block StringMap.t; @@ -69,11 +76,14 @@ type lifter_state = { (** {1 Utility functions} *) let empty_aslp_state ~entry ~exit () = + let assume = None + and has_pc_assign = false + and stmts () = CCVector.create () in let blocks = StringMap.of_list [ - (entry, { assume = None; stmts = CCVector.create (); succs = [ exit ] }); - (exit, { assume = None; stmts = CCVector.create (); succs = [] }); + (entry, { assume; stmts = stmts (); succs = [ exit ]; has_pc_assign }); + (exit, { assume; stmts = stmts (); succs = []; has_pc_assign }); ] in { blocks; entry; exit } From f7d5ece91cef6d8565baeb6f3736c9f4287174fb Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 15:51:49 +1000 Subject: [PATCH 05/28] ensure pc consistcne --- lib/transforms/aslp/aslp_state.ml | 27 +++++++++++++++++++------ lib/transforms/aslp/bincaml_ibi_make.ml | 24 +++++++++++++++++++++- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 744f29e6..17927986 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -24,11 +24,7 @@ type aslp_block = { [@@deriving show] (** An ASLp lifter block is a list of statements followed by a non-deterministic goto to a number of successors. Each block is optionally guarded by an - assume statement. - - We maintain the invariant that at the end of every {!aslp_block}, the [PC] - variable is either assigned on all paths or assigned on no paths (from the - beginning of the instruction). *) + assume statement. *) type aslp_state = { blocks : aslp_block StringMap.t; @@ -137,7 +133,26 @@ let add_stmt_to_block state key stmt = in { state with blocks } -let add_stmt_to_active (lifter_state : lifter_state) stmt = +(** Ensures that the given block ID has a PC assignment on all paths. If it + already {!has_pc_assign}, no changes are made. *) +let ensure_pc_assigned key state = + let blocks = state.blocks in + let block = + StringMap.find_opt key blocks + |> Option.get_exn_or "ensure_pc_assigned: block not found" + in + if not block.has_pc_assign then ( + let pc = Aslp_lexpr.to_var PC in + let incremented = + Expr.BasilExpr.(applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) + in + CCVector.push block.stmts + (Stmt.Instr_Assign { attrib = Attrib.empty; al = [ (pc, incremented) ] }); + let blocks = StringMap.add key { block with has_pc_assign = true } blocks in + { state with blocks }) + else state + +let add_stmt_to_active stmt (lifter_state : lifter_state) = let state = add_stmt_to_block lifter_state.state lifter_state.active stmt in { lifter_state with state } diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index c4e19d66..54d38acc 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -18,9 +18,11 @@ struct (** {2 Bincaml-specific utility functions} *) + (** Emits the given Bincaml statement. *) + let bincaml_emit stmt = S.bincaml_lifter_state := - Aslp_state.add_stmt_to_active !S.bincaml_lifter_state stmt + !S.bincaml_lifter_state |> Aslp_state.add_stmt_to_active stmt let bincaml_local_var name ty = let id_name = @@ -33,6 +35,26 @@ struct in Aslp_lexpr.Local (id_name, ty) + (** Ensures that the two given {!aslp_block} keys agree on their + {!has_pc_assign} property, before moving to a control-flow join of the two + blocks. + + This is used to maintain the invariant that at every control flow point, + the [PC] variable is either assigned on all paths or assigned on no paths + (from the beginning of the instruction). *) + let bincaml_ensure_pc_consistency left right = + let state = !S.bincaml_lifter_state.state in + if + (StringMap.find left state.blocks).has_pc_assign + || (StringMap.find right state.blocks).has_pc_assign + then + let state = + state + |> Aslp_state.ensure_pc_assigned left + |> Aslp_state.ensure_pc_assigned right + in + S.bincaml_lifter_state := { !S.bincaml_lifter_state with state } + (** {2 Instruction building interface implementation} *) let reset_ir () = From efcdda4bcd5e14a5642bd57a2997a4ff398add17 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 16:17:07 +1000 Subject: [PATCH 06/28] add_goto --- lib/transforms/aslp/aslp_state.ml | 46 +++++++++++++++---------- lib/transforms/aslp/bincaml_ibi_make.ml | 13 +++++-- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 17927986..45352a12 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -71,17 +71,16 @@ type lifter_state = { (** {1 Utility functions} *) +let empty_block () = + { + assume = None; + stmts = CCVector.create (); + succs = []; + has_pc_assign = false; + } + let empty_aslp_state ~entry ~exit () = - let assume = None - and has_pc_assign = false - and stmts () = CCVector.create () in - let blocks = - StringMap.of_list - [ - (entry, { assume; stmts = stmts (); succs = [ exit ]; has_pc_assign }); - (exit, { assume; stmts = stmts (); succs = []; has_pc_assign }); - ] - in + let blocks = StringMap.singleton entry (empty_block ()) in { blocks; entry; exit } (** Constructs a new empty {!lifter_state}. @@ -133,6 +132,10 @@ let add_stmt_to_block state key stmt = in { state with blocks } +let add_stmt_to_active stmt (lifter_state : lifter_state) = + let state = add_stmt_to_block lifter_state.state lifter_state.active stmt in + { lifter_state with state } + (** Ensures that the given block ID has a PC assignment on all paths. If it already {!has_pc_assign}, no changes are made. *) let ensure_pc_assigned key state = @@ -152,19 +155,26 @@ let ensure_pc_assigned key state = { state with blocks }) else state -let add_stmt_to_active stmt (lifter_state : lifter_state) = - let state = add_stmt_to_block lifter_state.state lifter_state.active stmt in - { lifter_state with state } - +(** Adds a new goto edge from [source] to [target]. If [source] was the exit + block, sets {!exit} to be [target]. *) let add_goto aslp_state ~source ~target = let blocks = StringMap.update source (function | Some blk -> Some { blk with succs = target :: blk.succs } - | _ -> failwith "add_goto: block not found") + | _ -> failwith "add_goto: source block not found") aslp_state.blocks in - { aslp_state with blocks } + let exit = + if String.equal source aslp_state.exit then target else aslp_state.exit + in + { aslp_state with blocks; exit } + +(** Creates a new block with the given name as a successor of the given [pred]. + If [pred] was the exit block, sets {!exit} to be the new block. *) +let add_succ aslp_state ~pred ~name = + let blocks = aslp_state.blocks |> StringMap.add name (empty_block ()) in + { aslp_state with blocks } |> add_goto ~source:pred ~target:name let append_aslp_states first second = let f key = function @@ -172,9 +182,7 @@ let append_aslp_states first second = | `Left a | `Right a -> Some a in let blocks = StringMap.merge_safe ~f first.blocks second.blocks in - add_goto - { blocks; entry = first.entry; exit = second.exit } - ~source:first.exit ~target:second.entry + { first with blocks } |> add_goto ~source:first.exit ~target:second.entry (** {1 Formatters} *) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 54d38acc..588f33ee 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -13,7 +13,15 @@ struct type expr = Expr.BasilExpr.t type lexpr = Aslp_lexpr.t type stmt = Aslp_state.stmt - type branch = Branch of string + + type branch = + | Branch of { + this : [ `True | `False | `Merge ]; + t : string; + f : string; + m : string; + } + type ast = Aslp_state.aslp_state (** {2 Bincaml-specific utility functions} *) @@ -209,7 +217,8 @@ struct let f_switch_context : branch -> unit = fun _ -> failwith "f_switch_context" let f_gen_branch : expr -> branch * branch * branch = - fun _ -> failwith "f_gen_branch" + fun cond -> + failwith "f_gen_branch" let f_true_branch : branch * branch * branch -> branch = fun _ -> failwith "f_true_branch" From b493102f61d89c89b6683f5114a93bf8b6282681 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 16:37:49 +1000 Subject: [PATCH 07/28] a gen branch might split a block down the middle :( --- lib/transforms/aslp/bincaml_ibi_make.ml | 40 +++++++++++++------------ 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 588f33ee..d04c1847 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -13,15 +13,7 @@ struct type expr = Expr.BasilExpr.t type lexpr = Aslp_lexpr.t type stmt = Aslp_state.stmt - - type branch = - | Branch of { - this : [ `True | `False | `Merge ]; - t : string; - f : string; - m : string; - } - + type branch = { this : string; t : string; f : string; m : string } type ast = Aslp_state.aslp_state (** {2 Bincaml-specific utility functions} *) @@ -214,21 +206,31 @@ struct let v___BranchTaken : lexpr = BranchTaken let v_BTypeNext : lexpr = BTypeNext let v___ExclusiveLocal : lexpr = ExclusiveLocal - let f_switch_context : branch -> unit = fun _ -> failwith "f_switch_context" let f_gen_branch : expr -> branch * branch * branch = fun cond -> - failwith "f_gen_branch" - - let f_true_branch : branch * branch * branch -> branch = - fun _ -> failwith "f_true_branch" - - let f_false_branch : branch * branch * branch -> branch = - fun _ -> failwith "f_false_branch" + let active = !S.bincaml_lifter_state.active in + let state = !S.bincaml_lifter_state.state in + let block_id = !S.bincaml_lifter_state.generator.block_id in + let t = block_id () and f = block_id () and m = block_id () in + let state = + state + |> Aslp_state.add_succ ~pred:active ~name:t + |> Aslp_state.add_succ ~pred:active ~name:f + |> Aslp_state.add_succ ~pred:t ~name:m + |> Aslp_state.add_goto ~source:f ~target:m + in + S.bincaml_lifter_state := { !S.bincaml_lifter_state with state }; + let tbranch = { this = t; t; f; m } in + (tbranch, { tbranch with this = f }, { tbranch with this = m }) - let f_merge_branch : branch * branch * branch -> branch = - fun _ -> failwith "f_merge_branch" + let f_switch_context : branch -> unit = + fun b -> + S.bincaml_lifter_state := { !S.bincaml_lifter_state with active = b.this } + let f_true_branch : branch * branch * branch -> branch = fun (t, f, m) -> t + let f_false_branch : branch * branch * branch -> branch = fun (t, f, m) -> f + let f_merge_branch : branch * branch * branch -> branch = fun (t, f, m) -> m let f_gen_assert : expr -> unit = fun _ -> failwith "f_gen_assert" let f_gen_bit_lit : bigint -> bitvector -> expr = From 8b676610a237863b4afcee293a7275815b7005c3 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 17:50:05 +1000 Subject: [PATCH 08/28] noncrashing --- lib/transforms/aslp/aslp_state.ml | 29 ++++++++---- lib/transforms/aslp/bincaml_ibi_make.ml | 19 ++++++-- test/transforms/test_aslp.ml | 63 +++++++++++++++---------- 3 files changed, 72 insertions(+), 39 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 45352a12..d176e605 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -80,7 +80,13 @@ let empty_block () = } let empty_aslp_state ~entry ~exit () = - let blocks = StringMap.singleton entry (empty_block ()) in + let blocks = + StringMap.of_list + [ + (entry, { (empty_block ()) with succs = [ exit ] }); + (exit, empty_block ()); + ] + in { blocks; entry; exit } (** Constructs a new empty {!lifter_state}. @@ -155,24 +161,29 @@ let ensure_pc_assigned key state = { state with blocks }) else state -(** Adds a new goto edge from [source] to [target]. If [source] was the exit - block, sets {!exit} to be [target]. *) -let add_goto aslp_state ~source ~target = +let modify_block aslp_state ~name ~f = let blocks = - StringMap.update source + StringMap.update name (function - | Some blk -> Some { blk with succs = target :: blk.succs } - | _ -> failwith "add_goto: source block not found") + | Some blk -> Some (f blk) + | _ -> failwith "modify_block: block not found") aslp_state.blocks in + { aslp_state with blocks } + +(** Adds a new goto edge from [source] to [target]. If [source] was the exit + block, sets {!exit} to be [target]. *) +let add_goto aslp_state ~source ~target = let exit = if String.equal source aslp_state.exit then target else aslp_state.exit in - { aslp_state with blocks; exit } + modify_block aslp_state ~name:source ~f:(fun b -> + { b with succs = target :: b.succs }) + |> fun s -> { s with exit } (** Creates a new block with the given name as a successor of the given [pred]. If [pred] was the exit block, sets {!exit} to be the new block. *) -let add_succ aslp_state ~pred ~name = +let add_block aslp_state ~pred ~name = let blocks = aslp_state.blocks |> StringMap.add name (empty_block ()) in { aslp_state with blocks } |> add_goto ~source:pred ~target:name diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index d04c1847..779e95bd 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -212,15 +212,24 @@ struct let active = !S.bincaml_lifter_state.active in let state = !S.bincaml_lifter_state.state in let block_id = !S.bincaml_lifter_state.generator.block_id in + let t = block_id () and f = block_id () and m = block_id () in + + let old_succs = ref [] in let state = state - |> Aslp_state.add_succ ~pred:active ~name:t - |> Aslp_state.add_succ ~pred:active ~name:f - |> Aslp_state.add_succ ~pred:t ~name:m + |> Aslp_state.modify_block ~name:active ~f:(fun b -> + old_succs := b.succs; + { b with succs = [] }) + |> Aslp_state.add_block ~pred:active ~name:t + |> Aslp_state.add_block ~pred:active ~name:f + |> Aslp_state.add_block ~pred:t ~name:m |> Aslp_state.add_goto ~source:f ~target:m + |> Aslp_state.modify_block ~name:m ~f:(fun b -> + { b with succs = !old_succs }) in S.bincaml_lifter_state := { !S.bincaml_lifter_state with state }; + let tbranch = { this = t; t; f; m } in (tbranch, { tbranch with this = f }, { tbranch with this = m }) @@ -236,8 +245,8 @@ struct let f_gen_bit_lit : bigint -> bitvector -> expr = fun _ bv -> Expr.BasilExpr.const (`Bitvector bv) - let f_gen_bool_lit : bool -> expr = fun _ -> failwith "f_gen_bool_lit" - let f_gen_int_lit : bigint -> expr = fun _ -> failwith "f_gen_int_lit" + let f_gen_bool_lit : bool -> expr = Expr.BasilExpr.boolconst + let f_gen_int_lit : bigint -> expr = Expr.BasilExpr.intconst let f_decl_bv : string -> bigint -> lexpr = fun name size -> bincaml_local_var name (Types.Bitvector (Z.to_int size)) diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index e8b547a1..77e7d37f 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -12,9 +12,12 @@ let%expect_test "lift empty" = [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"] }, "block_1" - -> { Aslp_state.assume = None; stmts = []; succs = [] }; entry = "block_0"; - exit = "block_1" } + -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"]; + has_pc_assign = false }, + "block_1" + -> { Aslp_state.assume = None; stmts = []; succs = []; + has_pc_assign = false }; + entry = "block_0"; exit = "block_1" } |}] let%expect_test "lift: add x1, x2, x3, lsl #4" = @@ -34,8 +37,10 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = stmts = [var var_0:bv64 := R2:bv64; var var_1:bv64 := R3:bv64; var R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; - succs = ["block_1"] }, - "block_1" -> { Aslp_state.assume = None; stmts = []; succs = [] }; + succs = ["block_1"]; has_pc_assign = false }, + "block_1" + -> { Aslp_state.assume = None; stmts = []; succs = []; + has_pc_assign = false }; entry = "block_0"; exit = "block_1" } |}] @@ -53,13 +58,17 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; - succs = ["block_1"] }, - "block_1" -> { Aslp_state.assume = None; stmts = []; succs = ["block_2"] }, + succs = ["block_1"]; has_pc_assign = false }, + "block_1" + -> { Aslp_state.assume = None; stmts = []; succs = ["block_2"]; + has_pc_assign = false }, "block_2" -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; - succs = ["block_3"] }, - "block_3" -> { Aslp_state.assume = None; stmts = []; succs = [] }; - entry = "block_0"; exit = "block_3" } + succs = ["block_3"]; has_pc_assign = false }, + "block_3" + -> { Aslp_state.assume = None; stmts = []; succs = []; + has_pc_assign = false }; + entry = "block_0"; exit = "block_2" } |}] let%expect_test "lift: b.eq #1024" = @@ -72,21 +81,25 @@ let%expect_test "lift: b.eq #1024" = (Bitvec.of_string "0x54002000:bv32") in print_endline @@ Aslp_state.show_aslp_state x; - [%expect.unreachable] -[@@expect.uncaught_exn - {| - (* CR expect_test_collector: This test expectation appears to contain a backtrace. - This is strongly discouraged as backtraces are fragile. - Please change this test to not include a backtrace. *) - (Failure f_gen_branch) - Raised at Stdlib.failwith in file "stdlib.ml", line 29, characters 17-33 - Called from OfflineASL_pc__Aarch64_branch_conditional_cond.f_aarch64_branch_conditional_cond in file "lib/pc/aarch64_branch_conditional_cond.ml", line 42, characters 16-63 - Called from Transforms__Aslp.lift_opcode.(fun) in file "lib/transforms/aslp/aslp.ml", line 18, characters 6-67 - Called from Stdlib__Fun.protect in file "fun.ml", line 34, characters 8-15 - Re-raised at Stdlib__Fun.protect in file "fun.ml", line 39, characters 6-52 - Called from Test_aslp.(fun) in file "test/transforms/test_aslp.ml", lines 69-72, characters 4-42 - Called from Ppx_expect_runtime__Test_block.Configured.dump_backtrace in file "runtime/test_block.ml", line 142, characters 10-28 - |}] + [%expect {| + { Aslp_state.blocks = "block_0" + -> { Aslp_state.assume = None; stmts = []; succs = ["block_3"; "block_2"]; + has_pc_assign = false }, + "block_1" + -> { Aslp_state.assume = None; stmts = []; succs = []; + has_pc_assign = false }, + "block_2" + -> { Aslp_state.assume = None; + stmts = [var BranchTaken:bool := true; var PC:bv64 := 0x400:bv64]; + succs = ["block_4"]; has_pc_assign = false }, + "block_3" + -> { Aslp_state.assume = None; stmts = []; succs = ["block_4"]; + has_pc_assign = false }, + "block_4" + -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"]; + has_pc_assign = false }; + entry = "block_0"; exit = "block_1" } + |}] let%expect_test "aslp integration basic" = let lst = From bc9430d6bf413ad9cad3f064b49c7dc6f229ef57 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:16:36 +1000 Subject: [PATCH 09/28] propagate has_pc_assign --- lib/transforms/aslp/aslp_state.ml | 72 ++++++++++++++----------- lib/transforms/aslp/bincaml_ibi_make.ml | 28 +++++++--- test/transforms/test_aslp.ml | 9 ++-- 3 files changed, 67 insertions(+), 42 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index d176e605..3de9bd40 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -126,50 +126,60 @@ let aslp_ids_from_generators ~block_ids ~local_ids = (** {1 State manipulation functions} *) -let add_stmt_to_block state key stmt = +let get_block aslp_state ~name = + StringMap.find_opt name aslp_state.blocks + |> Option.get_exn_or "get_block: block not found" + +let modify_block aslp_state ~name ~f = let blocks = - StringMap.update key + StringMap.update name (function - | Some blk -> - CCVector.push blk.stmts stmt; - Some blk - | None -> failwith "add_stmt: block not found") - state.blocks + | Some blk -> Some (f blk) + | _ -> failwith "modify_block: block not found") + aslp_state.blocks + in + { aslp_state with blocks } + +let add_stmt_to_block blk ~stmt = + let has_pc_assign = + match stmt with + | Stmt.Instr_Assign _ -> + Stmt.iter_assigned stmt |> Iter.mem ~eq:Var.equal Aslp_lexpr.(to_var PC) + | _ -> false in - { state with blocks } + + CCVector.push blk.stmts stmt; + if has_pc_assign then { blk with has_pc_assign } else blk let add_stmt_to_active stmt (lifter_state : lifter_state) = - let state = add_stmt_to_block lifter_state.state lifter_state.active stmt in + let state = + lifter_state.state + |> modify_block ~name:lifter_state.active ~f:(add_stmt_to_block ~stmt) + in { lifter_state with state } (** Ensures that the given block ID has a PC assignment on all paths. If it already {!has_pc_assign}, no changes are made. *) -let ensure_pc_assigned key state = +let ensure_pc_assigned ~name state = let blocks = state.blocks in let block = - StringMap.find_opt key blocks + StringMap.find_opt name blocks |> Option.get_exn_or "ensure_pc_assigned: block not found" in - if not block.has_pc_assign then ( - let pc = Aslp_lexpr.to_var PC in - let incremented = - Expr.BasilExpr.(applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) - in - CCVector.push block.stmts - (Stmt.Instr_Assign { attrib = Attrib.empty; al = [ (pc, incremented) ] }); - let blocks = StringMap.add key { block with has_pc_assign = true } blocks in - { state with blocks }) - else state - -let modify_block aslp_state ~name ~f = - let blocks = - StringMap.update name - (function - | Some blk -> Some (f blk) - | _ -> failwith "modify_block: block not found") - aslp_state.blocks - in - { aslp_state with blocks } + state + |> modify_block ~name ~f:(function + | { has_pc_assign = false } -> + let pc = Aslp_lexpr.to_var PC in + let incremented = + Expr.BasilExpr.( + applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) + in + let stmt = + Stmt.Instr_Assign + { attrib = Attrib.empty; al = [ (pc, incremented) ] } + in + block |> add_stmt_to_block ~stmt + | block -> block) (** Adds a new goto edge from [source] to [target]. If [source] was the exit block, sets {!exit} to be [target]. *) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 779e95bd..de8102ac 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -44,16 +44,19 @@ struct (from the beginning of the instruction). *) let bincaml_ensure_pc_consistency left right = let state = !S.bincaml_lifter_state.state in - if - (StringMap.find left state.blocks).has_pc_assign - || (StringMap.find right state.blocks).has_pc_assign - then + let has_any_pc_assign = + (Aslp_state.get_block state ~name:left).has_pc_assign + || (Aslp_state.get_block state ~name:right).has_pc_assign + in + if has_any_pc_assign then begin let state = state - |> Aslp_state.ensure_pc_assigned left - |> Aslp_state.ensure_pc_assigned right + |> Aslp_state.ensure_pc_assigned ~name:left + |> Aslp_state.ensure_pc_assigned ~name:right in S.bincaml_lifter_state := { !S.bincaml_lifter_state with state } + end; + has_any_pc_assign (** {2 Instruction building interface implementation} *) @@ -235,7 +238,18 @@ struct let f_switch_context : branch -> unit = fun b -> - S.bincaml_lifter_state := { !S.bincaml_lifter_state with active = b.this } + let state = + if String.equal b.this b.m then begin + let has_pc_assign = bincaml_ensure_pc_consistency b.t b.f in + + !S.bincaml_lifter_state.state + |> Aslp_state.modify_block ~name:b.this ~f:(fun merge -> + { merge with has_pc_assign }) + end + else !S.bincaml_lifter_state.state + in + S.bincaml_lifter_state := + { !S.bincaml_lifter_state with active = b.this; state } let f_true_branch : branch * branch * branch -> branch = fun (t, f, m) -> t let f_false_branch : branch * branch * branch -> branch = fun (t, f, m) -> f diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 77e7d37f..9262fb02 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -91,13 +91,14 @@ let%expect_test "lift: b.eq #1024" = "block_2" -> { Aslp_state.assume = None; stmts = [var BranchTaken:bool := true; var PC:bv64 := 0x400:bv64]; - succs = ["block_4"]; has_pc_assign = false }, + succs = ["block_4"]; has_pc_assign = true }, "block_3" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_4"]; - has_pc_assign = false }, + -> { Aslp_state.assume = None; + stmts = [var PC:bv64 := bvadd(PC:bv64, 0x4:bv32)]; + succs = ["block_4"]; has_pc_assign = true }, "block_4" -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"]; - has_pc_assign = false }; + has_pc_assign = true }; entry = "block_0"; exit = "block_1" } |}] From 37d6e654c86f56e87550835cbb292b51bbd1b035 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:24:15 +1000 Subject: [PATCH 10/28] shared entry+exit --- lib/analysis/dataflow_graph.ml | 2 +- lib/transforms/aslp/aslp_state.ml | 24 +++++++--------- lib/transforms/aslp/bincaml_ibi_make.ml | 1 + test/transforms/test_aslp.ml | 38 ++++++++----------------- 4 files changed, 25 insertions(+), 40 deletions(-) diff --git a/lib/analysis/dataflow_graph.ml b/lib/analysis/dataflow_graph.ml index 6c46bd48..47cc46f4 100644 --- a/lib/analysis/dataflow_graph.ml +++ b/lib/analysis/dataflow_graph.ml @@ -216,7 +216,7 @@ module SimpleSolver = struct module WL = Worklist.Make (Vertex) let deps ~assume_ssi (dir : [ `Backwards | `Forwards ]) p lookup v = - (* this is hacky to support ssi without proper sigma nodes in the IR; we should add them to block + (* this is hacky to support ssi without proper sigma nodes in the IR; we should add them to block type probably *) let all_deps = let defines (v : Vertex.t) = diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 3de9bd40..ffe3c801 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -18,8 +18,8 @@ type aslp_block = { [@printer Format.map CCVector.to_list pp_stmt_list_for_printing] succs : string list; has_pc_assign : bool; - (** Whether this block (or any of its same-instruction predecessors) have - explicitly assigned to the [PC] variable. *) + (** Whether, upon reaching the end of this block, it is guaranteed that + [PC] will have been assigned to on all control-flow paths. *) } [@@deriving show] (** An ASLp lifter block is a list of statements followed by a non-deterministic @@ -81,13 +81,9 @@ let empty_block () = let empty_aslp_state ~entry ~exit () = let blocks = - StringMap.of_list - [ - (entry, { (empty_block ()) with succs = [ exit ] }); - (exit, empty_block ()); - ] + StringMap.of_list [ (entry, { (empty_block ()) with succs = [] }) ] in - { blocks; entry; exit } + { blocks; entry; exit = entry } (** Constructs a new empty {!lifter_state}. @@ -169,16 +165,18 @@ let ensure_pc_assigned ~name state = state |> modify_block ~name ~f:(function | { has_pc_assign = false } -> - let pc = Aslp_lexpr.to_var PC in + let pc = Aslp_lexpr.to_var PC + and branchtaken = Aslp_lexpr.to_var BranchTaken in let incremented = Expr.BasilExpr.( applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) in - let stmt = - Stmt.Instr_Assign - { attrib = Attrib.empty; al = [ (pc, incremented) ] } + let al = + [ (pc, incremented); (branchtaken, Expr.BasilExpr.boolconst false) ] in - block |> add_stmt_to_block ~stmt + block + |> add_stmt_to_block + ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) | block -> block) (** Adds a new goto edge from [source] to [target]. If [source] was the exit diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index de8102ac..15d7cb1b 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -242,6 +242,7 @@ struct if String.equal b.this b.m then begin let has_pc_assign = bincaml_ensure_pc_consistency b.t b.f in + (* mindful that state access is after bincaml_ensure_pc_consistency *) !S.bincaml_lifter_state.state |> Aslp_state.modify_block ~name:b.this ~f:(fun merge -> { merge with has_pc_assign }) diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 9262fb02..0cd37a1d 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -12,12 +12,9 @@ let%expect_test "lift empty" = [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"]; - has_pc_assign = false }, - "block_1" -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = false }; - entry = "block_0"; exit = "block_1" } + entry = "block_0"; exit = "block_0" } |}] let%expect_test "lift: add x1, x2, x3, lsl #4" = @@ -37,11 +34,8 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = stmts = [var var_0:bv64 := R2:bv64; var var_1:bv64 := R3:bv64; var R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; - succs = ["block_1"]; has_pc_assign = false }, - "block_1" - -> { Aslp_state.assume = None; stmts = []; succs = []; - has_pc_assign = false }; - entry = "block_0"; exit = "block_1" } + succs = []; has_pc_assign = false }; + entry = "block_0"; exit = "block_0" } |}] let%expect_test "lift 2x: mov x1, #0xabcd" = @@ -54,20 +48,13 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = (Bitvec.of_string "0xd29579a1:bv32") in print_endline @@ Aslp_state.show_aslp_state x; - [%expect - {| + [%expect {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; - succs = ["block_1"]; has_pc_assign = false }, - "block_1" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_2"]; - has_pc_assign = false }, + succs = ["block_2"]; has_pc_assign = false }, "block_2" -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; - succs = ["block_3"]; has_pc_assign = false }, - "block_3" - -> { Aslp_state.assume = None; stmts = []; succs = []; - has_pc_assign = false }; + succs = []; has_pc_assign = false }; entry = "block_0"; exit = "block_2" } |}] @@ -85,21 +72,20 @@ let%expect_test "lift: b.eq #1024" = { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = []; succs = ["block_3"; "block_2"]; has_pc_assign = false }, - "block_1" - -> { Aslp_state.assume = None; stmts = []; succs = []; - has_pc_assign = false }, "block_2" -> { Aslp_state.assume = None; stmts = [var BranchTaken:bool := true; var PC:bv64 := 0x400:bv64]; succs = ["block_4"]; has_pc_assign = true }, "block_3" -> { Aslp_state.assume = None; - stmts = [var PC:bv64 := bvadd(PC:bv64, 0x4:bv32)]; + stmts = + [(var PC:bv64 := bvadd(PC:bv64, 0x4:bv32), var BranchTaken:bool := false) + ]; succs = ["block_4"]; has_pc_assign = true }, "block_4" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"]; - has_pc_assign = true }; - entry = "block_0"; exit = "block_1" } + -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = true + }; + entry = "block_0"; exit = "block_4" } |}] let%expect_test "aslp integration basic" = From 1b5253e10cc193bc83772ccc2736473b36ad5c8e Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:32:29 +1000 Subject: [PATCH 11/28] globalvar --- lib/transforms/aslp/aslp_state.ml | 13 +++---------- lib/transforms/aslp/bincaml_ibi_make.ml | 4 +++- test/transforms/test_aslp.ml | 14 ++++++-------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index ffe3c801..34153bfd 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -157,23 +157,16 @@ let add_stmt_to_active stmt (lifter_state : lifter_state) = (** Ensures that the given block ID has a PC assignment on all paths. If it already {!has_pc_assign}, no changes are made. *) let ensure_pc_assigned ~name state = - let blocks = state.blocks in - let block = - StringMap.find_opt name blocks - |> Option.get_exn_or "ensure_pc_assigned: block not found" - in state |> modify_block ~name ~f:(function - | { has_pc_assign = false } -> + | { has_pc_assign = false } as block -> let pc = Aslp_lexpr.to_var PC and branchtaken = Aslp_lexpr.to_var BranchTaken in let incremented = Expr.BasilExpr.( applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) - in - let al = - [ (pc, incremented); (branchtaken, Expr.BasilExpr.boolconst false) ] - in + and boolfalse = Expr.BasilExpr.boolconst false in + let al = [ (pc, incremented); (branchtaken, boolfalse) ] in block |> add_stmt_to_block ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 15d7cb1b..7da65d50 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -255,7 +255,9 @@ struct let f_true_branch : branch * branch * branch -> branch = fun (t, f, m) -> t let f_false_branch : branch * branch * branch -> branch = fun (t, f, m) -> f let f_merge_branch : branch * branch * branch -> branch = fun (t, f, m) -> m - let f_gen_assert : expr -> unit = fun _ -> failwith "f_gen_assert" + + let f_gen_assert : expr -> unit = + fun e -> bincaml_emit (Stmt.Instr_Assert { attrib = Attrib.empty; body = e }) let f_gen_bit_lit : bigint -> bitvector -> expr = fun _ bv -> Expr.BasilExpr.const (`Bitvector bv) diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 0cd37a1d..05493525 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -32,8 +32,8 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = - [var var_0:bv64 := R2:bv64; var var_1:bv64 := R3:bv64; - var R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; + [var var_0:bv64 := R2; var var_1:bv64 := R3; + R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; succs = []; has_pc_assign = false }; entry = "block_0"; exit = "block_0" } |}] @@ -50,10 +50,10 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = print_endline @@ Aslp_state.show_aslp_state x; [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; + -> { Aslp_state.assume = None; stmts = [R1:bv64 := 0xabcd:bv64]; succs = ["block_2"]; has_pc_assign = false }, "block_2" - -> { Aslp_state.assume = None; stmts = [var R1:bv64 := 0xabcd:bv64]; + -> { Aslp_state.assume = None; stmts = [R1:bv64 := 0xabcd:bv64]; succs = []; has_pc_assign = false }; entry = "block_0"; exit = "block_2" } |}] @@ -74,13 +74,11 @@ let%expect_test "lift: b.eq #1024" = has_pc_assign = false }, "block_2" -> { Aslp_state.assume = None; - stmts = [var BranchTaken:bool := true; var PC:bv64 := 0x400:bv64]; + stmts = [BranchTaken:bool := true; PC:bv64 := 0x400:bv64]; succs = ["block_4"]; has_pc_assign = true }, "block_3" -> { Aslp_state.assume = None; - stmts = - [(var PC:bv64 := bvadd(PC:bv64, 0x4:bv32), var BranchTaken:bool := false) - ]; + stmts = [(PC:bv64 := bvadd(PC, 0x4:bv32), BranchTaken:bool := false)]; succs = ["block_4"]; has_pc_assign = true }, "block_4" -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = true From 4dfd54b97a535d3940a59bbbebcdcbc57233d7dc Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:36:38 +1000 Subject: [PATCH 12/28] fix scopes --- test/transforms/test_aslp.ml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 05493525..a2380743 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -32,8 +32,8 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = - [var var_0:bv64 := R2; var var_1:bv64 := R3; - R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; + [var var_0:bv64 := $R2; var var_1:bv64 := $R3; + $R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; succs = []; has_pc_assign = false }; entry = "block_0"; exit = "block_0" } |}] @@ -50,10 +50,10 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = print_endline @@ Aslp_state.show_aslp_state x; [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = [R1:bv64 := 0xabcd:bv64]; + -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64]; succs = ["block_2"]; has_pc_assign = false }, "block_2" - -> { Aslp_state.assume = None; stmts = [R1:bv64 := 0xabcd:bv64]; + -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64]; succs = []; has_pc_assign = false }; entry = "block_0"; exit = "block_2" } |}] @@ -74,11 +74,12 @@ let%expect_test "lift: b.eq #1024" = has_pc_assign = false }, "block_2" -> { Aslp_state.assume = None; - stmts = [BranchTaken:bool := true; PC:bv64 := 0x400:bv64]; + stmts = [$BranchTaken:bool := true; $PC:bv64 := 0x400:bv64]; succs = ["block_4"]; has_pc_assign = true }, "block_3" -> { Aslp_state.assume = None; - stmts = [(PC:bv64 := bvadd(PC, 0x4:bv32), BranchTaken:bool := false)]; + stmts = + [($PC:bv64 := bvadd($PC, 0x4:bv32), $BranchTaken:bool := false)]; succs = ["block_4"]; has_pc_assign = true }, "block_4" -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = true From b96f50abf76dfa45783194263e467ac5c23f3eaa Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:36:53 +1000 Subject: [PATCH 13/28] add aslp_lexpr lol --- lib/transforms/aslp/aslp_lexpr.ml | 74 +++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 lib/transforms/aslp/aslp_lexpr.ml diff --git a/lib/transforms/aslp/aslp_lexpr.ml b/lib/transforms/aslp/aslp_lexpr.ml new file mode 100644 index 00000000..301553aa --- /dev/null +++ b/lib/transforms/aslp/aslp_lexpr.ml @@ -0,0 +1,74 @@ +open Lang +open Common + +(** A variable used by the ASLp lifter. Can be converted to a Bincaml {!Var.t} +*) +type t = + | Local of string * Types.t + | PC + | R of int option + (** A 64-bit ordinary register. [None] is used to represent the virtual + "array" of registers, since ASLp uses [f_gen_array_load] to access + registers. *) + | Z of int option (** A 128-bit scalar register. *) + | SP_EL0 + | FPSR + | FPCR + | PSTATE_C + | PSTATE_Z + | PSTATE_V + | PSTATE_N + | PSTATE_A + | PSTATE_D + | PSTATE_DIT + | PSTATE_F + | PSTATE_I + | PSTATE_PAN + | PSTATE_SP + | PSTATE_SSBS + | PSTATE_TCO + | PSTATE_UAO + | PSTATE_BTYPE + | BTypeCompatible + | BranchTaken + | BTypeNext + | ExclusiveLocal +[@@deriving show { with_path = false }] + +let typ (x : t) = + let bv = Types.bv in + match x with + | Local (_, t) -> t + | R (Some _) -> bv 64 + | Z (Some _) -> bv 128 + | PC -> bv 64 + | SP_EL0 -> bv 64 + | FPSR -> bv 64 + | FPCR -> bv 64 + | PSTATE_C | PSTATE_Z | PSTATE_V | PSTATE_N | PSTATE_A | PSTATE_I | PSTATE_F + | PSTATE_D | PSTATE_DIT | PSTATE_PAN | PSTATE_SP | PSTATE_SSBS | PSTATE_TCO + | PSTATE_UAO | PSTATE_BTYPE -> + bv 1 + | BTypeCompatible -> Types.Boolean + | BranchTaken -> Types.Boolean + | BTypeNext -> bv 2 + | ExclusiveLocal -> Types.Boolean + | R None | Z None -> failwith "typeof_lexpr: array lexpr has no bincaml type" + +let name = function + | Local (v, _) -> v + | R (Some n) -> Printf.sprintf "R%d" n + | Z (Some n) -> Printf.sprintf "Z%d" n + | R None | Z None -> failwith "name_of_lexpr: array lexpr has no bincaml name" + | v -> show v + +let scope = function + | Local _ -> Var.LocalVar + | BranchTaken | BTypeCompatible | BTypeNext -> LocalVar + | _ -> GlobalVar + +let to_var x = + let ty = typ x and name = name x in + let scope = scope x in + let name = match scope with GlobalVar -> "$" ^ name | _ -> name in + Var.create ~scope name ty From 6ddb5261cb6acbf02463a6ee1a34fa480a2e5dea Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:38:34 +1000 Subject: [PATCH 14/28] rename aslp_block to aslp_diamond --- lib/transforms/aslp/aslp_state.ml | 9 +++++---- lib/transforms/aslp/bincaml_ibi.ml | 2 +- lib/transforms/aslp/bincaml_ibi_make.ml | 8 ++++---- test/transforms/test_aslp.ml | 14 ++++++++------ 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 34153bfd..3ef96f42 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -26,7 +26,7 @@ type aslp_block = { goto to a number of successors. Each block is optionally guarded by an assume statement. *) -type aslp_state = { +type aslp_diamond = { blocks : aslp_block StringMap.t; [@printer StringMap.pp CCString.pp pp_aslp_block] entry : string; @@ -53,7 +53,8 @@ type aslp_ids = { block_id : unit -> string; local_id : unit -> string } type lifter_state = { active : string; (** Active block where new runtime statements will be appended. *) - state : aslp_state; (** Lifter state representing a control flow diamond. *) + state : aslp_diamond; + (** Lifter state representing a control flow diamond. *) generator : aslp_ids; [@opaque] (** Generators for ID names. *) names : (string, string) Hashtbl.t; (** Map of ASLp local variable names to the "ID-ified" names produced for @@ -200,7 +201,7 @@ let append_aslp_states first second = let show_aslp_block = show_aslp_block let pp_aslp_block = pp_aslp_block -let show_aslp_state = show_aslp_state -let pp_aslp_state = pp_aslp_state +let show_aslp_diamond = show_aslp_diamond +let pp_aslp_diamond = pp_aslp_diamond let show_lifter_state = show_lifter_state let pp_lifter_state = pp_lifter_state diff --git a/lib/transforms/aslp/bincaml_ibi.ml b/lib/transforms/aslp/bincaml_ibi.ml index bf7e2ba5..0d69c5cc 100644 --- a/lib/transforms/aslp/bincaml_ibi.ml +++ b/lib/transforms/aslp/bincaml_ibi.ml @@ -11,7 +11,7 @@ module type IBI = sig include OfflineASL_pc.Instruction_building_interface.IBI with type bitvector = Lang.Common.Bitvec.t - and type ast = Aslp_state.aslp_state + and type ast = Aslp_state.aslp_diamond end (** Builds a new {!IBI} with the given initial generator state. *) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 7da65d50..d58911b1 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -14,7 +14,7 @@ struct type lexpr = Aslp_lexpr.t type stmt = Aslp_state.stmt type branch = { this : string; t : string; f : string; m : string } - type ast = Aslp_state.aslp_state + type ast = Aslp_state.aslp_diamond (** {2 Bincaml-specific utility functions} *) @@ -35,9 +35,9 @@ struct in Aslp_lexpr.Local (id_name, ty) - (** Ensures that the two given {!aslp_block} keys agree on their - {!has_pc_assign} property, before moving to a control-flow join of the two - blocks. + (** Ensures that the two given block names agree on their {!has_pc_assign} + property. This function mutates the state, and this function should be + called before moving to a control-flow join of the two blocks This is used to maintain the invariant that at every control flow point, the [PC] variable is either assigned on all paths or assigned on no paths diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index a2380743..27be32f8 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -8,7 +8,7 @@ let%expect_test "lift empty" = let x = lift_code_block (module I) ~address:(Bitvec.zero ~size:64) @@ Iter.empty in - print_endline @@ Aslp_state.show_aslp_state x; + print_endline @@ Aslp_state.show_aslp_diamond x; [%expect {| { Aslp_state.blocks = "block_0" @@ -26,7 +26,7 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = ~address:(Bitvec.zero ~size:64) (Bitvec.of_string "0x8b031041:bv32") in - print_endline @@ Aslp_state.show_aslp_state x; + print_endline @@ Aslp_state.show_aslp_diamond x; [%expect {| { Aslp_state.blocks = "block_0" @@ -47,8 +47,9 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = (Bitvec.of_string "0xd29579a1:bv32") (Bitvec.of_string "0xd29579a1:bv32") in - print_endline @@ Aslp_state.show_aslp_state x; - [%expect {| + print_endline @@ Aslp_state.show_aslp_diamond x; + [%expect + {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64]; succs = ["block_2"]; has_pc_assign = false }, @@ -67,8 +68,9 @@ let%expect_test "lift: b.eq #1024" = ~address:(Bitvec.zero ~size:64) (Bitvec.of_string "0x54002000:bv32") in - print_endline @@ Aslp_state.show_aslp_state x; - [%expect {| + print_endline @@ Aslp_state.show_aslp_diamond x; + [%expect + {| { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = []; succs = ["block_3"; "block_2"]; has_pc_assign = false }, From e343d08efad5717faf73327ab6e31b6f5d0954be Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:43:46 +1000 Subject: [PATCH 15/28] rename to diamond --- lib/transforms/aslp/aslp_state.ml | 10 ++++----- lib/transforms/aslp/bincaml_ibi.ml | 2 +- lib/transforms/aslp/bincaml_ibi_make.ml | 29 +++++++++++++------------ test/transforms/test_aslp.ml | 4 ++-- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 3ef96f42..82cd54f4 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -53,7 +53,7 @@ type aslp_ids = { block_id : unit -> string; local_id : unit -> string } type lifter_state = { active : string; (** Active block where new runtime statements will be appended. *) - state : aslp_diamond; + diamond : aslp_diamond; (** Lifter state representing a control flow diamond. *) generator : aslp_ids; [@opaque] (** Generators for ID names. *) names : (string, string) Hashtbl.t; @@ -95,7 +95,7 @@ let empty_lifter_state ~generator () = let exit = generator.block_id () in { active = entry; - state = empty_aslp_state ~entry ~exit (); + diamond = empty_aslp_state ~entry ~exit (); names = Hashtbl.create 16; generator; } @@ -149,11 +149,11 @@ let add_stmt_to_block blk ~stmt = if has_pc_assign then { blk with has_pc_assign } else blk let add_stmt_to_active stmt (lifter_state : lifter_state) = - let state = - lifter_state.state + let diamond = + lifter_state.diamond |> modify_block ~name:lifter_state.active ~f:(add_stmt_to_block ~stmt) in - { lifter_state with state } + { lifter_state with diamond } (** Ensures that the given block ID has a PC assignment on all paths. If it already {!has_pc_assign}, no changes are made. *) diff --git a/lib/transforms/aslp/bincaml_ibi.ml b/lib/transforms/aslp/bincaml_ibi.ml index 0d69c5cc..4de64dae 100644 --- a/lib/transforms/aslp/bincaml_ibi.ml +++ b/lib/transforms/aslp/bincaml_ibi.ml @@ -5,7 +5,7 @@ include Bincaml_ibi_make (** @inline *) (** Abstract Bincaml IBI signature. Defines the input type as - {!Lang.Common.Bitvec.t} and the output type as {!Aslp_state.aslp_state} but + {!Lang.Common.Bitvec.t} and the output type as {!Aslp_state.aslp_diamond} but leaving other types opaque. *) module type IBI = sig include diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index d58911b1..9fcd0f28 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -35,26 +35,27 @@ struct in Aslp_lexpr.Local (id_name, ty) - (** Ensures that the two given block names agree on their {!has_pc_assign} - property. This function mutates the state, and this function should be - called before moving to a control-flow join of the two blocks + (** Ensures that the two given block names agree on their + {!Aslp_state.has_pc_assign} property. This function mutates the state, and + this function should be called before moving to a control-flow join of the + two blocks This is used to maintain the invariant that at every control flow point, the [PC] variable is either assigned on all paths or assigned on no paths (from the beginning of the instruction). *) let bincaml_ensure_pc_consistency left right = - let state = !S.bincaml_lifter_state.state in + let state = !S.bincaml_lifter_state.diamond in let has_any_pc_assign = (Aslp_state.get_block state ~name:left).has_pc_assign || (Aslp_state.get_block state ~name:right).has_pc_assign in if has_any_pc_assign then begin - let state = + let diamond = state |> Aslp_state.ensure_pc_assigned ~name:left |> Aslp_state.ensure_pc_assigned ~name:right in - S.bincaml_lifter_state := { !S.bincaml_lifter_state with state } + S.bincaml_lifter_state := { !S.bincaml_lifter_state with diamond } end; has_any_pc_assign @@ -64,7 +65,7 @@ struct let generator = !S.bincaml_lifter_state.generator in S.bincaml_lifter_state := Aslp_state.empty_lifter_state ~generator () - let get_ir () = !S.bincaml_lifter_state.state + let get_ir () = !S.bincaml_lifter_state.diamond let bigint_of_string : string -> bigint = Z.of_string_base 10 let bigint_of_int : int -> bigint = Z.of_int let bigint_zero : bigint = Z.zero @@ -213,13 +214,13 @@ struct let f_gen_branch : expr -> branch * branch * branch = fun cond -> let active = !S.bincaml_lifter_state.active in - let state = !S.bincaml_lifter_state.state in + let state = !S.bincaml_lifter_state.diamond in let block_id = !S.bincaml_lifter_state.generator.block_id in let t = block_id () and f = block_id () and m = block_id () in let old_succs = ref [] in - let state = + let diamond = state |> Aslp_state.modify_block ~name:active ~f:(fun b -> old_succs := b.succs; @@ -231,26 +232,26 @@ struct |> Aslp_state.modify_block ~name:m ~f:(fun b -> { b with succs = !old_succs }) in - S.bincaml_lifter_state := { !S.bincaml_lifter_state with state }; + S.bincaml_lifter_state := { !S.bincaml_lifter_state with diamond }; let tbranch = { this = t; t; f; m } in (tbranch, { tbranch with this = f }, { tbranch with this = m }) let f_switch_context : branch -> unit = fun b -> - let state = + let diamond = if String.equal b.this b.m then begin let has_pc_assign = bincaml_ensure_pc_consistency b.t b.f in (* mindful that state access is after bincaml_ensure_pc_consistency *) - !S.bincaml_lifter_state.state + !S.bincaml_lifter_state.diamond |> Aslp_state.modify_block ~name:b.this ~f:(fun merge -> { merge with has_pc_assign }) end - else !S.bincaml_lifter_state.state + else !S.bincaml_lifter_state.diamond in S.bincaml_lifter_state := - { !S.bincaml_lifter_state with active = b.this; state } + { !S.bincaml_lifter_state with active = b.this; diamond } let f_true_branch : branch * branch * branch -> branch = fun (t, f, m) -> t let f_false_branch : branch * branch * branch -> branch = fun (t, f, m) -> f diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 27be32f8..e8d6160a 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -76,12 +76,12 @@ let%expect_test "lift: b.eq #1024" = has_pc_assign = false }, "block_2" -> { Aslp_state.assume = None; - stmts = [$BranchTaken:bool := true; $PC:bv64 := 0x400:bv64]; + stmts = [var BranchTaken:bool := true; $PC:bv64 := 0x400:bv64]; succs = ["block_4"]; has_pc_assign = true }, "block_3" -> { Aslp_state.assume = None; stmts = - [($PC:bv64 := bvadd($PC, 0x4:bv32), $BranchTaken:bool := false)]; + [($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; succs = ["block_4"]; has_pc_assign = true }, "block_4" -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = true From 85e154bf0759c65372053ae12adb45d3aaeb636b Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 18:49:06 +1000 Subject: [PATCH 16/28] remove ~exit to avoid double increment --- lib/transforms/aslp/aslp_state.ml | 5 ++- lib/transforms/aslp/bincaml_ibi_make.ml | 5 ++- test/transforms/test_aslp.ml | 43 +++++++++++++++---------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 82cd54f4..c52aa93b 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -80,7 +80,7 @@ let empty_block () = has_pc_assign = false; } -let empty_aslp_state ~entry ~exit () = +let empty_aslp_state ~entry () = let blocks = StringMap.of_list [ (entry, { (empty_block ()) with succs = [] }) ] in @@ -92,10 +92,9 @@ let empty_aslp_state ~entry ~exit () = value by passing it explicitly. *) let empty_lifter_state ~generator () = let entry = generator.block_id () in - let exit = generator.block_id () in { active = entry; - diamond = empty_aslp_state ~entry ~exit (); + diamond = empty_aslp_state ~entry (); names = Hashtbl.create 16; generator; } diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 9fcd0f28..f7c33bbe 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -65,7 +65,10 @@ struct let generator = !S.bincaml_lifter_state.generator in S.bincaml_lifter_state := Aslp_state.empty_lifter_state ~generator () - let get_ir () = !S.bincaml_lifter_state.diamond + let get_ir () = + let diamond = !S.bincaml_lifter_state.diamond in + Aslp_state.ensure_pc_assigned ~name:diamond.exit diamond + let bigint_of_string : string -> bigint = Z.of_string_base 10 let bigint_of_int : int -> bigint = Z.of_int let bigint_zero : bigint = Z.zero diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index e8d6160a..0a6185c3 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -12,8 +12,10 @@ let%expect_test "lift empty" = [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = []; succs = []; - has_pc_assign = false }; + -> { Aslp_state.assume = None; + stmts = + [($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + succs = []; has_pc_assign = true }; entry = "block_0"; exit = "block_0" } |}] @@ -33,8 +35,9 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = -> { Aslp_state.assume = None; stmts = [var var_0:bv64 := $R2; var var_1:bv64 := $R3; - $R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12))]; - succs = []; has_pc_assign = false }; + $R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12)); + ($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + succs = []; has_pc_assign = true }; entry = "block_0"; exit = "block_0" } |}] @@ -51,12 +54,18 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64]; - succs = ["block_2"]; has_pc_assign = false }, - "block_2" - -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64]; - succs = []; has_pc_assign = false }; - entry = "block_0"; exit = "block_2" } + -> { Aslp_state.assume = None; + stmts = + [$R1:bv64 := 0xabcd:bv64; + ($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + succs = ["block_1"]; has_pc_assign = true }, + "block_1" + -> { Aslp_state.assume = None; + stmts = + [$R1:bv64 := 0xabcd:bv64; + ($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + succs = []; has_pc_assign = true }; + entry = "block_0"; exit = "block_1" } |}] let%expect_test "lift: b.eq #1024" = @@ -72,21 +81,21 @@ let%expect_test "lift: b.eq #1024" = [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_3"; "block_2"]; + -> { Aslp_state.assume = None; stmts = []; succs = ["block_2"; "block_1"]; has_pc_assign = false }, - "block_2" + "block_1" -> { Aslp_state.assume = None; stmts = [var BranchTaken:bool := true; $PC:bv64 := 0x400:bv64]; - succs = ["block_4"]; has_pc_assign = true }, - "block_3" + succs = ["block_3"]; has_pc_assign = true }, + "block_2" -> { Aslp_state.assume = None; stmts = [($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; - succs = ["block_4"]; has_pc_assign = true }, - "block_4" + succs = ["block_3"]; has_pc_assign = true }, + "block_3" -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = true }; - entry = "block_0"; exit = "block_4" } + entry = "block_0"; exit = "block_3" } |}] let%expect_test "aslp integration basic" = From 693dc1ec93ba6e75aa2e3c347f523656ec238c95 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 19:31:09 +1000 Subject: [PATCH 17/28] use stringset and slightly smarter pc propagation --- lib/transforms/aslp/aslp_state.ml | 49 ++++++++++---- lib/transforms/aslp/bincaml_ibi.ml | 4 +- lib/transforms/aslp/bincaml_ibi_make.ml | 86 +++++++++++-------------- test/transforms/test_aslp.ml | 6 +- 4 files changed, 79 insertions(+), 66 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index c52aa93b..dcaa1dd8 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -9,14 +9,15 @@ type stmt = (** A statement within the Bincaml AST. This is just a type alias. *) open struct - type stmt_list_for_printing = stmt list [@@deriving show] + type 'a list_for_printing = 'a list [@@deriving show] end type aslp_block = { assume : Expr.BasilExpr.t option; stmts : stmt CCVector.vector; - [@printer Format.map CCVector.to_list pp_stmt_list_for_printing] - succs : string list; + [@printer Format.map CCVector.to_list (pp_list_for_printing pp_stmt)] + succs : StringSet.t; + [@printer Format.map StringSet.to_list (pp_list_for_printing String.pp)] has_pc_assign : bool; (** Whether, upon reaching the end of this block, it is guaranteed that [PC] will have been assigned to on all control-flow paths. *) @@ -76,14 +77,12 @@ let empty_block () = { assume = None; stmts = CCVector.create (); - succs = []; + succs = StringSet.empty; has_pc_assign = false; } let empty_aslp_state ~entry () = - let blocks = - StringMap.of_list [ (entry, { (empty_block ()) with succs = [] }) ] - in + let blocks = StringMap.singleton entry (empty_block ()) in { blocks; entry; exit = entry } (** Constructs a new empty {!lifter_state}. @@ -172,20 +171,46 @@ let ensure_pc_assigned ~name state = ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) | block -> block) +(** Ensures that the two given block names agree on their {!has_pc_assign} + property. If [PC] is assigned in only one of the blocks, a default increment + statement will be added to the other block. Otherwise, nothing changes. + + This function should be called before moving to a control-flow join of the + two blocks. + + This is used to maintain the invariant that at every control flow point, the + [PC] variable is either assigned on all paths or assigned on no paths (from + the beginning of the instruction). *) +let ensure_pc_consistency state ~left ~right = + let has_any_pc_assign = + (get_block state ~name:left).has_pc_assign + || (get_block state ~name:right).has_pc_assign + in + if has_any_pc_assign then + state |> ensure_pc_assigned ~name:left |> ensure_pc_assigned ~name:right + else state + (** Adds a new goto edge from [source] to [target]. If [source] was the exit - block, sets {!exit} to be [target]. *) + block, sets {!exit} to be [target]. If [source] {!has_pc_assign}, this is + propagated to [target]. *) let add_goto aslp_state ~source ~target = let exit = if String.equal source aslp_state.exit then target else aslp_state.exit in - modify_block aslp_state ~name:source ~f:(fun b -> - { b with succs = target :: b.succs }) + let has_pc_assign = ref false in + aslp_state + |> modify_block ~name:source ~f:(fun b -> + has_pc_assign := b.has_pc_assign; + { b with succs = StringSet.add target b.succs }) + |> modify_block ~name:target ~f:(fun b -> + if !has_pc_assign then { b with has_pc_assign = !has_pc_assign } else b) |> fun s -> { s with exit } (** Creates a new block with the given name as a successor of the given [pred]. If [pred] was the exit block, sets {!exit} to be the new block. *) -let add_block aslp_state ~pred ~name = - let blocks = aslp_state.blocks |> StringMap.add name (empty_block ()) in +let add_block ?assume aslp_state ~pred ~name = + let new_block = { (empty_block ()) with assume } in + let blocks = aslp_state.blocks |> StringMap.add name new_block in { aslp_state with blocks } |> add_goto ~source:pred ~target:name let append_aslp_states first second = diff --git a/lib/transforms/aslp/bincaml_ibi.ml b/lib/transforms/aslp/bincaml_ibi.ml index 4de64dae..b2660982 100644 --- a/lib/transforms/aslp/bincaml_ibi.ml +++ b/lib/transforms/aslp/bincaml_ibi.ml @@ -5,8 +5,8 @@ include Bincaml_ibi_make (** @inline *) (** Abstract Bincaml IBI signature. Defines the input type as - {!Lang.Common.Bitvec.t} and the output type as {!Aslp_state.aslp_diamond} but - leaving other types opaque. *) + {!Lang.Common.Bitvec.t} and the output type as {!Aslp_state.aslp_diamond} + but leaving other types opaque. *) module type IBI = sig include OfflineASL_pc.Instruction_building_interface.IBI diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index f7c33bbe..a5d55c8e 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -13,7 +13,15 @@ struct type expr = Expr.BasilExpr.t type lexpr = Aslp_lexpr.t type stmt = Aslp_state.stmt - type branch = { this : string; t : string; f : string; m : string } + + type branch = { + this : [ `T | `F | `M ]; + prev : string; + t : string; + f : string; + m : string; + } + type ast = Aslp_state.aslp_diamond (** {2 Bincaml-specific utility functions} *) @@ -35,30 +43,6 @@ struct in Aslp_lexpr.Local (id_name, ty) - (** Ensures that the two given block names agree on their - {!Aslp_state.has_pc_assign} property. This function mutates the state, and - this function should be called before moving to a control-flow join of the - two blocks - - This is used to maintain the invariant that at every control flow point, - the [PC] variable is either assigned on all paths or assigned on no paths - (from the beginning of the instruction). *) - let bincaml_ensure_pc_consistency left right = - let state = !S.bincaml_lifter_state.diamond in - let has_any_pc_assign = - (Aslp_state.get_block state ~name:left).has_pc_assign - || (Aslp_state.get_block state ~name:right).has_pc_assign - in - if has_any_pc_assign then begin - let diamond = - state - |> Aslp_state.ensure_pc_assigned ~name:left - |> Aslp_state.ensure_pc_assigned ~name:right - in - S.bincaml_lifter_state := { !S.bincaml_lifter_state with diamond } - end; - has_any_pc_assign - (** {2 Instruction building interface implementation} *) let reset_ir () = @@ -216,45 +200,49 @@ struct let f_gen_branch : expr -> branch * branch * branch = fun cond -> - let active = !S.bincaml_lifter_state.active in - let state = !S.bincaml_lifter_state.diamond in - let block_id = !S.bincaml_lifter_state.generator.block_id in + let st = !S.bincaml_lifter_state in + let block_id = st.generator.block_id + and ncond = Expr.BasilExpr.boolnot cond in let t = block_id () and f = block_id () and m = block_id () in - let old_succs = ref [] in + let original_succs = ref StringSet.empty in let diamond = - state - |> Aslp_state.modify_block ~name:active ~f:(fun b -> - old_succs := b.succs; - { b with succs = [] }) - |> Aslp_state.add_block ~pred:active ~name:t - |> Aslp_state.add_block ~pred:active ~name:f + st.diamond + |> Aslp_state.modify_block ~name:st.active ~f:(fun b -> + original_succs := b.succs; + { b with succs = StringSet.empty }) + |> Aslp_state.add_block ~pred:st.active ~name:t ~assume:cond + |> Aslp_state.add_block ~pred:st.active ~name:f ~assume:ncond |> Aslp_state.add_block ~pred:t ~name:m |> Aslp_state.add_goto ~source:f ~target:m |> Aslp_state.modify_block ~name:m ~f:(fun b -> - { b with succs = !old_succs }) + { b with succs = !original_succs }) in - S.bincaml_lifter_state := { !S.bincaml_lifter_state with diamond }; + S.bincaml_lifter_state := { st with diamond }; - let tbranch = { this = t; t; f; m } in - (tbranch, { tbranch with this = f }, { tbranch with this = m }) + let tbranch = { this = `T; t; f; m; prev = st.active } in + (tbranch, { tbranch with this = `F }, { tbranch with this = `M }) let f_switch_context : branch -> unit = fun b -> + let this, preds = + match b.this with + | `T -> (b.t, [ b.prev ]) + | `F -> (b.f, [ b.prev ]) + | `M -> (b.m, [ b.t; b.f ]) + in let diamond = - if String.equal b.this b.m then begin - let has_pc_assign = bincaml_ensure_pc_consistency b.t b.f in - - (* mindful that state access is after bincaml_ensure_pc_consistency *) - !S.bincaml_lifter_state.diamond - |> Aslp_state.modify_block ~name:b.this ~f:(fun merge -> - { merge with has_pc_assign }) - end - else !S.bincaml_lifter_state.diamond + !S.bincaml_lifter_state.diamond + |> (match b.this with + | `M -> Aslp_state.ensure_pc_consistency ~left:b.t ~right:b.f + | _ -> Fun.id) + |> List.fold_right + (fun source -> Aslp_state.add_goto ~source ~target:this) + preds in S.bincaml_lifter_state := - { !S.bincaml_lifter_state with active = b.this; diamond } + { !S.bincaml_lifter_state with active = this; diamond } let f_true_branch : branch * branch * branch -> branch = fun (t, f, m) -> t let f_false_branch : branch * branch * branch -> branch = fun (t, f, m) -> f diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 0a6185c3..051ac1c8 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -81,14 +81,14 @@ let%expect_test "lift: b.eq #1024" = [%expect {| { Aslp_state.blocks = "block_0" - -> { Aslp_state.assume = None; stmts = []; succs = ["block_2"; "block_1"]; + -> { Aslp_state.assume = None; stmts = []; succs = ["block_1"; "block_2"]; has_pc_assign = false }, "block_1" - -> { Aslp_state.assume = None; + -> { Aslp_state.assume = (Some eq($PSTATE_Z, 0x1:bv1)); stmts = [var BranchTaken:bool := true; $PC:bv64 := 0x400:bv64]; succs = ["block_3"]; has_pc_assign = true }, "block_2" - -> { Aslp_state.assume = None; + -> { Aslp_state.assume = (Some boolnot(eq($PSTATE_Z, 0x1:bv1))); stmts = [($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; succs = ["block_3"]; has_pc_assign = true }, From f5a7ec63933e3f56ffcd2abb22a26c04c5669297 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 19:37:15 +1000 Subject: [PATCH 18/28] touch --- lib/transforms/aslp/bincaml_ibi_make.ml | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index a5d55c8e..0093c057 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -221,25 +221,25 @@ struct in S.bincaml_lifter_state := { st with diamond }; - let tbranch = { this = `T; t; f; m; prev = st.active } in - (tbranch, { tbranch with this = `F }, { tbranch with this = `M }) + let branch mode = { this = mode; t; f; m; prev = st.active } in + (branch `T, branch `F, branch `M) let f_switch_context : branch -> unit = fun b -> - let this, preds = + let this, preds, fixup_pc = match b.this with - | `T -> (b.t, [ b.prev ]) - | `F -> (b.f, [ b.prev ]) - | `M -> (b.m, [ b.t; b.f ]) + | `T -> (b.t, [ b.prev ], Fun.id) + | `F -> (b.f, [ b.prev ], Fun.id) + | `M -> + ( b.m, + [ b.t; b.f ], + Aslp_state.ensure_pc_consistency ~left:b.t ~right:b.f ) in let diamond = - !S.bincaml_lifter_state.diamond - |> (match b.this with - | `M -> Aslp_state.ensure_pc_consistency ~left:b.t ~right:b.f - | _ -> Fun.id) - |> List.fold_right - (fun source -> Aslp_state.add_goto ~source ~target:this) - preds + !S.bincaml_lifter_state.diamond |> fixup_pc |> fun diamond -> + List.fold_left + (fun diamond source -> Aslp_state.add_goto ~source ~target:this diamond) + diamond preds in S.bincaml_lifter_state := { !S.bincaml_lifter_state with active = this; diamond } From 700ada1925e726754e80dab5b5e83ce5147d18b5 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 19:48:20 +1000 Subject: [PATCH 19/28] touch --- lib/transforms/aslp/aslp_lexpr.ml | 5 +- lib/transforms/aslp/aslp_state.ml | 83 ++++++++++++------------- lib/transforms/aslp/bincaml_ibi_make.ml | 9 +-- 3 files changed, 47 insertions(+), 50 deletions(-) diff --git a/lib/transforms/aslp/aslp_lexpr.ml b/lib/transforms/aslp/aslp_lexpr.ml index 301553aa..876d299f 100644 --- a/lib/transforms/aslp/aslp_lexpr.ml +++ b/lib/transforms/aslp/aslp_lexpr.ml @@ -1,7 +1,7 @@ open Lang open Common -(** A variable used by the ASLp lifter. Can be converted to a Bincaml {!Var.t} +(** A variable used by the ASLp lifter. Can be converted to a Bincaml {!Var.t}. *) type t = | Local of string * Types.t @@ -68,7 +68,6 @@ let scope = function | _ -> GlobalVar let to_var x = - let ty = typ x and name = name x in - let scope = scope x in + let ty = typ x and name = name x and scope = scope x in let name = match scope with GlobalVar -> "$" ^ name | _ -> name in Var.create ~scope name ty diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index dcaa1dd8..1009aa1d 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -74,12 +74,8 @@ type lifter_state = { (** {1 Utility functions} *) let empty_block () = - { - assume = None; - stmts = CCVector.create (); - succs = StringSet.empty; - has_pc_assign = false; - } + let stmts = CCVector.create () and succs = StringSet.empty in + { assume = None; stmts; succs; has_pc_assign = false } let empty_aslp_state ~entry () = let blocks = StringMap.singleton entry (empty_block ()) in @@ -153,43 +149,6 @@ let add_stmt_to_active stmt (lifter_state : lifter_state) = in { lifter_state with diamond } -(** Ensures that the given block ID has a PC assignment on all paths. If it - already {!has_pc_assign}, no changes are made. *) -let ensure_pc_assigned ~name state = - state - |> modify_block ~name ~f:(function - | { has_pc_assign = false } as block -> - let pc = Aslp_lexpr.to_var PC - and branchtaken = Aslp_lexpr.to_var BranchTaken in - let incremented = - Expr.BasilExpr.( - applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) - and boolfalse = Expr.BasilExpr.boolconst false in - let al = [ (pc, incremented); (branchtaken, boolfalse) ] in - block - |> add_stmt_to_block - ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) - | block -> block) - -(** Ensures that the two given block names agree on their {!has_pc_assign} - property. If [PC] is assigned in only one of the blocks, a default increment - statement will be added to the other block. Otherwise, nothing changes. - - This function should be called before moving to a control-flow join of the - two blocks. - - This is used to maintain the invariant that at every control flow point, the - [PC] variable is either assigned on all paths or assigned on no paths (from - the beginning of the instruction). *) -let ensure_pc_consistency state ~left ~right = - let has_any_pc_assign = - (get_block state ~name:left).has_pc_assign - || (get_block state ~name:right).has_pc_assign - in - if has_any_pc_assign then - state |> ensure_pc_assigned ~name:left |> ensure_pc_assigned ~name:right - else state - (** Adds a new goto edge from [source] to [target]. If [source] was the exit block, sets {!exit} to be [target]. If [source] {!has_pc_assign}, this is propagated to [target]. *) @@ -213,6 +172,8 @@ let add_block ?assume aslp_state ~pred ~name = let blocks = aslp_state.blocks |> StringMap.add name new_block in { aslp_state with blocks } |> add_goto ~source:pred ~target:name +(** {1 Advanced functions} *) + let append_aslp_states first second = let f key = function | `Both _ -> failwith "overlapping aslp_state block names" @@ -221,6 +182,42 @@ let append_aslp_states first second = let blocks = StringMap.merge_safe ~f first.blocks second.blocks in { first with blocks } |> add_goto ~source:first.exit ~target:second.entry +(** Ensures that the given block ID has a PC assignment on all paths. If it + already {!has_pc_assign}, no changes are made. *) +let ensure_pc_assigned ~name = + modify_block ~name ~f:(function + | { has_pc_assign = false } as block -> + let pc = Aslp_lexpr.to_var PC + and branchtaken = Aslp_lexpr.to_var BranchTaken in + let incremented = + Expr.BasilExpr.( + applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) + and boolfalse = Expr.BasilExpr.boolconst false in + let al = [ (pc, incremented); (branchtaken, boolfalse) ] in + block + |> add_stmt_to_block + ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) + | block -> block) + +(** Ensures that the two given block names agree on their {!has_pc_assign} + property. If [PC] is assigned in only one of the blocks, a default increment + statement will be added to the other block. Otherwise, nothing changes. + + This function should be called before moving to a control-flow join of the + two blocks. + + This is used to maintain the invariant that at every control flow point, the + [PC] variable is either assigned on all paths or assigned on no paths (from + the beginning of the instruction). *) +let ensure_pc_consistency state ~left ~right = + match + ( (get_block state ~name:left).has_pc_assign, + (get_block state ~name:right).has_pc_assign ) + with + | true, false -> state |> ensure_pc_assigned ~name:right + | false, true -> state |> ensure_pc_assigned ~name:left + | _ -> state + (** {1 Formatters} *) let show_aslp_block = show_aslp_block diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 0093c057..247e6dc5 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -236,10 +236,11 @@ struct Aslp_state.ensure_pc_consistency ~left:b.t ~right:b.f ) in let diamond = - !S.bincaml_lifter_state.diamond |> fixup_pc |> fun diamond -> - List.fold_left - (fun diamond source -> Aslp_state.add_goto ~source ~target:this diamond) - diamond preds + !S.bincaml_lifter_state.diamond |> fixup_pc + |> Fun.flip + (List.fold_left (fun diamond source -> + Aslp_state.add_goto ~source ~target:this diamond)) + preds in S.bincaml_lifter_state := { !S.bincaml_lifter_state with active = this; diamond } From 45bc560ea903d458953e23062a29cf3a38703a3a Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 22 Jun 2026 19:53:34 +1000 Subject: [PATCH 20/28] x --- lib/transforms/aslp/aslp_state.ml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 1009aa1d..6be5f648 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -3,9 +3,13 @@ open Common (** {1 Types} *) -type stmt = - ((Var.t, Var.t, Expr.BasilExpr.t) Stmt.t[@printer Stmt.pp_stmt_basil]) -[@@deriving show] +open struct + type stmt = + ((Var.t, Var.t, Expr.BasilExpr.t) Stmt.t[@printer Stmt.pp_stmt_basil]) + [@@deriving show] +end + +type nonrec stmt = stmt (** A statement within the Bincaml AST. This is just a type alias. *) open struct @@ -172,8 +176,8 @@ let add_block ?assume aslp_state ~pred ~name = let blocks = aslp_state.blocks |> StringMap.add name new_block in { aslp_state with blocks } |> add_goto ~source:pred ~target:name -(** {1 Advanced functions} *) - +(** Sequentially joins the given {!aslp_diamond}s such that [first] is succeeded + by [second]. *) let append_aslp_states first second = let f key = function | `Both _ -> failwith "overlapping aslp_state block names" @@ -182,6 +186,8 @@ let append_aslp_states first second = let blocks = StringMap.merge_safe ~f first.blocks second.blocks in { first with blocks } |> add_goto ~source:first.exit ~target:second.entry +(** {1 Program counter functions} *) + (** Ensures that the given block ID has a PC assignment on all paths. If it already {!has_pc_assign}, no changes are made. *) let ensure_pc_assigned ~name = From 15d8f33f4eb6575fec364b93b132f0f416781bc6 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 13:49:10 +1000 Subject: [PATCH 21/28] match on `BranchTaken = true` to detect has_pc_assign --- lib/transforms/aslp/aslp_state.ml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 6be5f648..c2393cf1 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -138,11 +138,16 @@ let modify_block aslp_state ~name ~f = let add_stmt_to_block blk ~stmt = let has_pc_assign = match stmt with - | Stmt.Instr_Assign _ -> - Stmt.iter_assigned stmt |> Iter.mem ~eq:Var.equal Aslp_lexpr.(to_var PC) + | Stmt.Instr_Assign { al = assigns; _ } -> + let branchtaken = Aslp_lexpr.(to_var BranchTaken) in + assigns + |> List.Assoc.map_values Expr.BasilExpr.unfix + |> List.exists (function + | l, Abstract_expr.AbstractExpr.Constant { const = `Bool true; _ } -> + Var.equal l branchtaken + | _ -> false) | _ -> false in - CCVector.push blk.stmts stmt; if has_pc_assign then { blk with has_pc_assign } else blk From c219370dfae499b4cbd26b7ea0614c92c302c102 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 13:57:58 +1000 Subject: [PATCH 22/28] avoid `ref` --- lib/transforms/aslp/aslp_state.ml | 8 +++++--- lib/transforms/aslp/bincaml_ibi_make.ml | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index c2393cf1..7c54d905 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -165,13 +165,15 @@ let add_goto aslp_state ~source ~target = let exit = if String.equal source aslp_state.exit then target else aslp_state.exit in - let has_pc_assign = ref false in + let src_has_pc_assign = + aslp_state |> get_block ~name:source |> fun x -> x.has_pc_assign + in aslp_state |> modify_block ~name:source ~f:(fun b -> - has_pc_assign := b.has_pc_assign; { b with succs = StringSet.add target b.succs }) |> modify_block ~name:target ~f:(fun b -> - if !has_pc_assign then { b with has_pc_assign = !has_pc_assign } else b) + if src_has_pc_assign then { b with has_pc_assign = src_has_pc_assign } + else b) |> fun s -> { s with exit } (** Creates a new block with the given name as a successor of the given [pred]. diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index 247e6dc5..a93156f0 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -205,19 +205,19 @@ struct and ncond = Expr.BasilExpr.boolnot cond in let t = block_id () and f = block_id () and m = block_id () in - - let original_succs = ref StringSet.empty in + let original_succs = + st.diamond |> Aslp_state.get_block ~name:st.active |> fun x -> x.succs + in let diamond = st.diamond |> Aslp_state.modify_block ~name:st.active ~f:(fun b -> - original_succs := b.succs; { b with succs = StringSet.empty }) |> Aslp_state.add_block ~pred:st.active ~name:t ~assume:cond |> Aslp_state.add_block ~pred:st.active ~name:f ~assume:ncond |> Aslp_state.add_block ~pred:t ~name:m |> Aslp_state.add_goto ~source:f ~target:m |> Aslp_state.modify_block ~name:m ~f:(fun b -> - { b with succs = !original_succs }) + { b with succs = original_succs }) in S.bincaml_lifter_state := { st with diamond }; From dbe34b42cba67232ea26a794023888678aef9236 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 14:21:26 +1000 Subject: [PATCH 23/28] simplify the branch types --- lib/transforms/aslp/aslp_state.ml | 42 +++++++++++++++--------- lib/transforms/aslp/bincaml_ibi_make.ml | 43 +++++++++---------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 7c54d905..19ebebfa 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -195,8 +195,8 @@ let append_aslp_states first second = (** {1 Program counter functions} *) -(** Ensures that the given block ID has a PC assignment on all paths. If it - already {!has_pc_assign}, no changes are made. *) +(** Ensures that the given block ID has a PC assignment. If it already + {!has_pc_assign}, no changes are made. *) let ensure_pc_assigned ~name = modify_block ~name ~f:(function | { has_pc_assign = false } as block -> @@ -212,24 +212,36 @@ let ensure_pc_assigned ~name = ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) | block -> block) -(** Ensures that the two given block names agree on their {!has_pc_assign} +(** Ensures that the left and right blocks agree on their {!has_pc_assign} property. If [PC] is assigned in only one of the blocks, a default increment - statement will be added to the other block. Otherwise, nothing changes. - - This function should be called before moving to a control-flow join of the - two blocks. + statement will be added to the other block and {!has_pc_assign} will be + propagated to [join]. Otherwise, nothing changes. + + This function should be called with blocks in this structure: + {v + left right + \ / + join + v} + It should be called after [left] and [right] have been populated with + statements, before moving to [join]. This is used to maintain the invariant that at every control flow point, the [PC] variable is either assigned on all paths or assigned on no paths (from the beginning of the instruction). *) -let ensure_pc_consistency state ~left ~right = - match - ( (get_block state ~name:left).has_pc_assign, - (get_block state ~name:right).has_pc_assign ) - with - | true, false -> state |> ensure_pc_assigned ~name:right - | false, true -> state |> ensure_pc_assigned ~name:left - | _ -> state +let ensure_pc_consistency state ~left ~right ~join = + let has_pc_assign, state = + match + ( (get_block state ~name:left).has_pc_assign, + (get_block state ~name:right).has_pc_assign ) + with + | true, false -> (true, state |> ensure_pc_assigned ~name:right) + | false, true -> (true, state |> ensure_pc_assigned ~name:left) + | _ -> (false, state) + in + if has_pc_assign then + state |> modify_block ~name:join ~f:(fun b -> { b with has_pc_assign }) + else state (** {1 Formatters} *) diff --git a/lib/transforms/aslp/bincaml_ibi_make.ml b/lib/transforms/aslp/bincaml_ibi_make.ml index a93156f0..275331b9 100644 --- a/lib/transforms/aslp/bincaml_ibi_make.ml +++ b/lib/transforms/aslp/bincaml_ibi_make.ml @@ -14,13 +14,12 @@ struct type lexpr = Aslp_lexpr.t type stmt = Aslp_state.stmt - type branch = { - this : [ `T | `F | `M ]; - prev : string; - t : string; - f : string; - m : string; - } + type branch = + [ `T of string + | `F of string + | `M of string * (string * string) + (** A merge branch also records its true and false predecessors to + propagate PC information. *) ] type ast = Aslp_state.aslp_diamond @@ -220,30 +219,20 @@ struct { b with succs = original_succs }) in S.bincaml_lifter_state := { st with diamond }; - - let branch mode = { this = mode; t; f; m; prev = st.active } in - (branch `T, branch `F, branch `M) + (`T t, `F f, `M (m, (t, f))) let f_switch_context : branch -> unit = fun b -> - let this, preds, fixup_pc = - match b.this with - | `T -> (b.t, [ b.prev ], Fun.id) - | `F -> (b.f, [ b.prev ], Fun.id) - | `M -> - ( b.m, - [ b.t; b.f ], - Aslp_state.ensure_pc_consistency ~left:b.t ~right:b.f ) - in - let diamond = - !S.bincaml_lifter_state.diamond |> fixup_pc - |> Fun.flip - (List.fold_left (fun diamond source -> - Aslp_state.add_goto ~source ~target:this diamond)) - preds + let diamond = !S.bincaml_lifter_state.diamond in + let active, diamond = + match b with + | `T x | `F x -> (x, diamond) + | `M (m, (t, f)) -> + ( m, + diamond |> Aslp_state.ensure_pc_consistency ~left:t ~right:f ~join:m + ) in - S.bincaml_lifter_state := - { !S.bincaml_lifter_state with active = this; diamond } + S.bincaml_lifter_state := { !S.bincaml_lifter_state with active; diamond } let f_true_branch : branch * branch * branch -> branch = fun (t, f, m) -> t let f_false_branch : branch * branch * branch -> branch = fun (t, f, m) -> f From af7fd51783245c3d91ba7348d62f098a85301270 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 14:33:50 +1000 Subject: [PATCH 24/28] match on any assigns to branchtaken --- lib/transforms/aslp/aslp_state.ml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 19ebebfa..76ac8389 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -139,13 +139,8 @@ let add_stmt_to_block blk ~stmt = let has_pc_assign = match stmt with | Stmt.Instr_Assign { al = assigns; _ } -> - let branchtaken = Aslp_lexpr.(to_var BranchTaken) in - assigns - |> List.Assoc.map_values Expr.BasilExpr.unfix - |> List.exists (function - | l, Abstract_expr.AbstractExpr.Constant { const = `Bool true; _ } -> - Var.equal l branchtaken - | _ -> false) + assigns |> List.map fst + |> List.mem ~eq:Var.equal Aslp_lexpr.(to_var BranchTaken) | _ -> false in CCVector.push blk.stmts stmt; From 9b0393a133a79dc740c3694cd7f11dd970af097f Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 14:39:32 +1000 Subject: [PATCH 25/28] " " PC --- lib/transforms/aslp/aslp_state.ml | 5 ++--- test/transforms/test_aslp.ml | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 76ac8389..f2b37d6c 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -139,8 +139,7 @@ let add_stmt_to_block blk ~stmt = let has_pc_assign = match stmt with | Stmt.Instr_Assign { al = assigns; _ } -> - assigns |> List.map fst - |> List.mem ~eq:Var.equal Aslp_lexpr.(to_var BranchTaken) + assigns |> List.map fst |> List.mem ~eq:Var.equal Aslp_lexpr.(to_var PC) | _ -> false in CCVector.push blk.stmts stmt; @@ -201,7 +200,7 @@ let ensure_pc_assigned ~name = Expr.BasilExpr.( applyintrin ~op:`BVADD [ rvar pc; bv_of_int ~size:32 4 ]) and boolfalse = Expr.BasilExpr.boolconst false in - let al = [ (pc, incremented); (branchtaken, boolfalse) ] in + let al = [ (branchtaken, boolfalse); (pc, incremented) ] in block |> add_stmt_to_block ~stmt:(Stmt.Instr_Assign { attrib = Attrib.empty; al }) diff --git a/test/transforms/test_aslp.ml b/test/transforms/test_aslp.ml index 051ac1c8..459f35a4 100644 --- a/test/transforms/test_aslp.ml +++ b/test/transforms/test_aslp.ml @@ -14,7 +14,7 @@ let%expect_test "lift empty" = { Aslp_state.blocks = "block_0" -> { Aslp_state.assume = None; stmts = - [($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + [(var BranchTaken:bool := false, $PC:bv64 := bvadd($PC, 0x4:bv32))]; succs = []; has_pc_assign = true }; entry = "block_0"; exit = "block_0" } |}] @@ -36,7 +36,7 @@ let%expect_test "lift: add x1, x2, x3, lsl #4" = stmts = [var var_0:bv64 := $R2; var var_1:bv64 := $R3; $R1:bv64 := bvadd(var_0:bv64, bvshl(var_1:bv64, 0x4:bv12)); - ($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + (var BranchTaken:bool := false, $PC:bv64 := bvadd($PC, 0x4:bv32))]; succs = []; has_pc_assign = true }; entry = "block_0"; exit = "block_0" } |}] @@ -57,13 +57,13 @@ let%expect_test "lift 2x: mov x1, #0xabcd" = -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64; - ($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + (var BranchTaken:bool := false, $PC:bv64 := bvadd($PC, 0x4:bv32))]; succs = ["block_1"]; has_pc_assign = true }, "block_1" -> { Aslp_state.assume = None; stmts = [$R1:bv64 := 0xabcd:bv64; - ($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + (var BranchTaken:bool := false, $PC:bv64 := bvadd($PC, 0x4:bv32))]; succs = []; has_pc_assign = true }; entry = "block_0"; exit = "block_1" } |}] @@ -90,7 +90,7 @@ let%expect_test "lift: b.eq #1024" = "block_2" -> { Aslp_state.assume = (Some boolnot(eq($PSTATE_Z, 0x1:bv1))); stmts = - [($PC:bv64 := bvadd($PC, 0x4:bv32), var BranchTaken:bool := false)]; + [(var BranchTaken:bool := false, $PC:bv64 := bvadd($PC, 0x4:bv32))]; succs = ["block_3"]; has_pc_assign = true }, "block_3" -> { Aslp_state.assume = None; stmts = []; succs = []; has_pc_assign = true From 84cd59090d3348147cde228d62139fe4c6005844 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 16:02:11 +1000 Subject: [PATCH 26/28] has_pc_assign assertion --- lib/transforms/aslp/aslp_state.ml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index f2b37d6c..2a33f0b3 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -142,8 +142,15 @@ let add_stmt_to_block blk ~stmt = assigns |> List.map fst |> List.mem ~eq:Var.equal Aslp_lexpr.(to_var PC) | _ -> false in - CCVector.push blk.stmts stmt; - if has_pc_assign then { blk with has_pc_assign } else blk + match (has_pc_assign, blk.has_pc_assign) with + | true, true -> + failwith + "add_stmt_to_block: attempt to add PC assignment but has_pc_assign is \ + already set" + | true, false -> + CCVector.push blk.stmts stmt; + { blk with has_pc_assign } + | false, _ -> blk let add_stmt_to_active stmt (lifter_state : lifter_state) = let diamond = From 12103ae8416909e8e96ef884af64ded4820f1db3 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 16:16:45 +1000 Subject: [PATCH 27/28] fix silly bug and add double PC assumption --- lib/transforms/aslp/aslp_state.ml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 2a33f0b3..0ab349ef 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -135,6 +135,12 @@ let modify_block aslp_state ~name ~f = in { aslp_state with blocks } +(** Appends the given statement to the given block. + + Sets {!has_pc_assign} if the statement is an assignment to {!Aslp_lexpr.PC}. + It is assumed that [PC] is assigned at most once on any straight-line path. + Raises an exception if the statement is an assignment to [PC] and + {!has_pc_assign} is already set. *) let add_stmt_to_block blk ~stmt = let has_pc_assign = match stmt with @@ -150,7 +156,9 @@ let add_stmt_to_block blk ~stmt = | true, false -> CCVector.push blk.stmts stmt; { blk with has_pc_assign } - | false, _ -> blk + | false, _ -> + CCVector.push blk.stmts stmt; + blk let add_stmt_to_active stmt (lifter_state : lifter_state) = let diamond = From e4fbb19971d4896214dd7e3241c4946654760cc9 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 23 Jun 2026 16:33:14 +1000 Subject: [PATCH 28/28] %> --- lib/transforms/aslp/aslp_state.ml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/transforms/aslp/aslp_state.ml b/lib/transforms/aslp/aslp_state.ml index 0ab349ef..b1bee8a1 100644 --- a/lib/transforms/aslp/aslp_state.ml +++ b/lib/transforms/aslp/aslp_state.ml @@ -103,8 +103,8 @@ let empty_lifter_state ~generator () = Be careful! You should use {!aslp_ids_from_generators} if you will use the lifted statements within an existing Bincaml IR. *) let empty_aslp_ids () = - let block_id = Printf.sprintf "block_%d" % Fix.Gensym.make () - and local_id = Printf.sprintf "var_%d" % Fix.Gensym.make () in + let block_id = Fix.Gensym.make () %> Printf.sprintf "block_%d" + and local_id = Fix.Gensym.make () %> Printf.sprintf "var_%d" in { block_id; local_id } (** {2 ID-generating functions} *) @@ -115,8 +115,8 @@ let empty_aslp_ids () = This will ensure that ASLp's local variable and block names do not clash with existing names. *) let aslp_ids_from_generators ~block_ids ~local_ids = - let block_id = ID.name % ID.fresh ~name:"block" block_ids - and local_id = ID.name % ID.fresh ~name:"var" local_ids in + let block_id = ID.fresh ~name:"block" block_ids %> ID.name + and local_id = ID.fresh ~name:"var" local_ids %> ID.name in { block_id; local_id } (** {1 State manipulation functions} *)