From bdf06c71433ea2155c0db78d309a0790bc83411c Mon Sep 17 00:00:00 2001 From: belhasse Date: Sat, 21 Mar 2026 13:27:12 +0100 Subject: [PATCH 1/2] escape data strings according to WebAssembly spec , Fixes #37 --- src/ir/text.ml | 24 +++++++++++++++++++++++- test/fmt/data_bytes.wat | 4 ++++ test/fmt/data_roundtrip.t | 8 ++++++++ test/fmt/data_special_chars.wat | 4 ++++ test/fmt/dune | 2 ++ test/fmt/print.t | 12 ++++++++++++ 6 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 test/fmt/data_bytes.wat create mode 100644 test/fmt/data_roundtrip.t create mode 100644 test/fmt/data_special_chars.wat diff --git a/src/ir/text.ml b/src/ir/text.ml index 2af9b75e0..67e8ffb10 100644 --- a/src/ir/text.ml +++ b/src/ir/text.ml @@ -16,6 +16,28 @@ type indice = let pp_id fmt id = pf fmt "$%s" id +let pp_name_inner fmt s = + let pp_hex_char fmt c = pf fmt "\\%02x" (Char.code c) in + let pp_char fmt = function + | '\n' -> string fmt "\\n" + | '\r' -> string fmt "\\r" + | '\t' -> string fmt "\\t" + | '\'' -> string fmt "\\'" + | '"' -> string fmt "\\\"" + | '\\' -> string fmt "\\\\" + | c -> + let ci = Char.code c in + if 0x20 <= ci && ci < 0x7f then char fmt c else pp_hex_char fmt c + in + let pp_unicode_char fmt = function + | (0x09 | 0x0a) as c -> pp_char fmt (Char.chr c) + | uc when 0x20 <= uc && uc < 0x7f -> pp_char fmt (Char.chr uc) + | uc -> pf fmt "\\u{%02x}" uc + in + String.iter (fun c -> pp_unicode_char fmt (Char.code c)) s + +let pp_name fmt s = pf fmt {|"%a"|} pp_name_inner s + let pp_id_opt fmt = function None -> () | Some i -> pf fmt " %a" pp_id i let pp_indice fmt = function Raw u -> int fmt u | Text i -> pp_id fmt i @@ -761,7 +783,7 @@ module Data = struct } let pp fmt (d : t) = - pf fmt {|(data%a %a %S)|} pp_id_opt d.id Mode.pp d.mode d.init + pf fmt {|(data%a %a %a)|} pp_id_opt d.id Mode.pp d.mode pp_name d.init end module Tag = struct diff --git a/test/fmt/data_bytes.wat b/test/fmt/data_bytes.wat new file mode 100644 index 000000000..0b61a1d7b --- /dev/null +++ b/test/fmt/data_bytes.wat @@ -0,0 +1,4 @@ +(module + (memory 1) + (data (i32.const 0) "Hello\00World\01\02\03\ff") +) diff --git a/test/fmt/data_roundtrip.t b/test/fmt/data_roundtrip.t new file mode 100644 index 000000000..4dda1361a --- /dev/null +++ b/test/fmt/data_roundtrip.t @@ -0,0 +1,8 @@ +test data special chars round-trip: + $ owi fmt data_special_chars.wat > /tmp/owi_test_output.wat + $ owi fmt /tmp/owi_test_output.wat + (module + (memory 1) + (data (memory 0) (offset i32.const 0) "hello\n\t\u{0d}\"\'\\world") + ) + $ rm /tmp/owi_test_output.wat diff --git a/test/fmt/data_special_chars.wat b/test/fmt/data_special_chars.wat new file mode 100644 index 000000000..0a0f8d4cc --- /dev/null +++ b/test/fmt/data_special_chars.wat @@ -0,0 +1,4 @@ +(module + (memory 1) + (data (i32.const 0) "hello\n\t\r\"'\\world") +) diff --git a/test/fmt/dune b/test/fmt/dune index 8c5d72d10..fe75cf4cf 100644 --- a/test/fmt/dune +++ b/test/fmt/dune @@ -11,5 +11,7 @@ m.wat locals.wat locals_drop.wat + data_special_chars.wat + data_bytes.wat script.wast script.t)) diff --git a/test/fmt/print.t b/test/fmt/print.t index c7c599387..42980de20 100644 --- a/test/fmt/print.t +++ b/test/fmt/print.t @@ -32,3 +32,15 @@ print simplified: ) (start 1) ) +print data with special chars: + $ owi fmt data_special_chars.wat + (module + (memory 1) + (data (memory 0) (offset i32.const 0) "hello\n\t\u{0d}\"\'\\world") + ) +print data with raw bytes: + $ owi fmt data_bytes.wat + (module + (memory 1) + (data (memory 0) (offset i32.const 0) "Hello\u{00}World\u{01}\u{02}\u{03}\u{ff}") + ) From 5934e688b0dce1e14b3d097390e00b91a610ce7e Mon Sep 17 00:00:00 2001 From: belhasse Date: Tue, 24 Mar 2026 14:10:58 +0100 Subject: [PATCH 2/2] Address review comments for data string formatting --- src/ir/text.ml | 12 +++++------- test/fmt/data_roundtrip.t | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/ir/text.ml b/src/ir/text.ml index 67e8ffb10..a2be1bd98 100644 --- a/src/ir/text.ml +++ b/src/ir/text.ml @@ -25,16 +25,14 @@ let pp_name_inner fmt s = | '\'' -> string fmt "\\'" | '"' -> string fmt "\\\"" | '\\' -> string fmt "\\\\" - | c -> - let ci = Char.code c in - if 0x20 <= ci && ci < 0x7f then char fmt c else pp_hex_char fmt c + | '\x20' .. '\x7e' as c -> char fmt c + | c -> pp_hex_char fmt c in let pp_unicode_char fmt = function - | (0x09 | 0x0a) as c -> pp_char fmt (Char.chr c) - | uc when 0x20 <= uc && uc < 0x7f -> pp_char fmt (Char.chr uc) - | uc -> pf fmt "\\u{%02x}" uc + | ('\t' | '\n' | '\x20' .. '\x7e') as c -> pp_char fmt c + | c -> pf fmt "\\u{%02x}" (Char.code c) in - String.iter (fun c -> pp_unicode_char fmt (Char.code c)) s + String.iter (pp_unicode_char fmt) s let pp_name fmt s = pf fmt {|"%a"|} pp_name_inner s diff --git a/test/fmt/data_roundtrip.t b/test/fmt/data_roundtrip.t index 4dda1361a..df582ff58 100644 --- a/test/fmt/data_roundtrip.t +++ b/test/fmt/data_roundtrip.t @@ -1,8 +1,8 @@ test data special chars round-trip: - $ owi fmt data_special_chars.wat > /tmp/owi_test_output.wat - $ owi fmt /tmp/owi_test_output.wat + $ owi fmt data_special_chars.wat > ./owi_test_output.wat + $ owi fmt ./owi_test_output.wat (module (memory 1) (data (memory 0) (offset i32.const 0) "hello\n\t\u{0d}\"\'\\world") ) - $ rm /tmp/owi_test_output.wat + $ rm ./owi_test_output.wat