From 2cf3a3a0e2a504cc4d2d8a5f758159e39eb21aca Mon Sep 17 00:00:00 2001 From: Adam Korczynski Date: Fri, 10 Apr 2026 18:35:24 +0100 Subject: [PATCH] Add fuzzer for json_decode module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fuzzes the CPython _json C module (Modules/_json.c) through JSONDecoder.decode() and JSONDecoder.raw_decode(), dispatched per input via FuzzedDataProvider. Input bytes are decoded as latin-1 so every byte value maps to a distinct code point, preserving the full 0–255 byte space at the parser boundary — in contrast to json.py, which feeds UTF-8 with errors="replace" and collapses any invalid sequence to U+FFFD, sharply shrinking the effective input space. It also reaches raw_decode()'s trailing-data position reporting that json.py never calls, and drops the dumps/loads roundtrip to focus purely on decoder hardening rather than re-encoding already-valid objects. --- Makefile | 5 ++++- fuzz_targets.txt | 1 + json_decode.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 json_decode.py diff --git a/Makefile b/Makefile index 7bbbca4..a5e88ab 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-binascii +all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-binascii fuzzer-json-decode PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) @@ -43,3 +43,6 @@ fuzzer-zoneinfo: fuzzer-binascii: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"binascii.py\"" -ldl $(LDFLAGS) -o fuzzer-binascii + +fuzzer-json-decode: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"json_decode.py\"" -ldl $(LDFLAGS) -o fuzzer-json-decode diff --git a/fuzz_targets.txt b/fuzz_targets.txt index b016889..c7a836b 100644 --- a/fuzz_targets.txt +++ b/fuzz_targets.txt @@ -8,6 +8,7 @@ email email.py html html.py httpclient httpclient.py json json.py +json-decode json_decode.py plistlib plist.py re re.py tarfile tarfile.py diff --git a/json_decode.py b/json_decode.py new file mode 100644 index 0000000..62aa855 --- /dev/null +++ b/json_decode.py @@ -0,0 +1,35 @@ +from fuzzeddataprovider import FuzzedDataProvider +import json + +LOADS = 0 +DECODER_DECODE = 1 +DECODER_RAW_DECODE = 2 + + +# Fuzzes the _json C module's decoding paths (Modules/_json.c). +# Exercises json.loads(), JSONDecoder.decode(), and +# JSONDecoder.raw_decode() with fuzzed byte input decoded as latin-1. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(DECODER_DECODE, DECODER_RAW_DECODE) + n = ( + fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) + if fdp.remaining_bytes() > 0 + else 0 + ) + if n == 0: + return + s = fdp.ConsumeBytes(n).decode("latin-1") + try: + if target == DECODER_DECODE: + dec = json.JSONDecoder() + dec.decode(s) + elif target == DECODER_RAW_DECODE: + dec = json.JSONDecoder() + dec.raw_decode(s) + except (json.JSONDecodeError, ValueError, RecursionError): + pass + except Exception: + pass