Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions src/execve/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,14 @@ static int bind_proc_pid_auxv(const Tracee *ptracee)

/**
* Convert @mappings into load @script statements at the given @cursor
* position. This function returns the new cursor position.
* position. When @is_pic is true the PIE action variants are emitted
* so the loader will let the kernel choose the base address for the
* first segment (avoiding hardcoded EXEC_PIC_ADDRESS / INTERP_PIC_ADDRESS
* that may collide with vdso/kshare guard zones on some ARM64 kernels),
* then MAP_FIXED subsequent segments at the correct offsets.
* This function returns the new cursor position.
*/
static void *transcript_mappings(void *cursor, const Mapping *mappings)
static void *transcript_mappings(void *cursor, const Mapping *mappings, bool is_pic)
{
size_t nb_mappings;
size_t i;
Expand All @@ -151,9 +156,9 @@ static void *transcript_mappings(void *cursor, const Mapping *mappings)
LoadStatement *statement = cursor;

if ((mappings[i].flags & MAP_ANONYMOUS) != 0)
statement->action = LOAD_ACTION_MMAP_ANON;
statement->action = is_pic ? LOAD_ACTION_MMAP_PIC_ANON : LOAD_ACTION_MMAP_ANON;
else
statement->action = LOAD_ACTION_MMAP_FILE;
statement->action = is_pic ? LOAD_ACTION_MMAP_PIC_FILE : LOAD_ACTION_MMAP_FILE;

statement->mmap.addr = mappings[i].addr;
statement->mmap.length = mappings[i].length;
Expand Down Expand Up @@ -208,7 +213,9 @@ static int transfer_load_script(Tracee *tracee)
needs_executable_stack = (tracee->load_info->needs_executable_stack
|| ( tracee->load_info->interp != NULL
&& tracee->load_info->interp->needs_executable_stack));

bool exec_is_pic = IS_POSITION_INDENPENDANT(tracee->load_info->elf_header);
bool interp_is_pic = tracee->load_info->interp != NULL
&& IS_POSITION_INDENPENDANT(tracee->load_info->interp->elf_header);
/* Strings addresses are required to generate the load script,
* for "open" actions. Since I want to generate it in one
* pass, these strings will be put right below the current
Expand Down Expand Up @@ -269,7 +276,7 @@ static int transfer_load_script(Tracee *tracee)
cursor += LOAD_STATEMENT_SIZE(*statement, open);

/* Load script statements: mmap. */
cursor = transcript_mappings(cursor, tracee->load_info->mappings);
cursor = transcript_mappings(cursor, tracee->load_info->mappings, exec_is_pic);

if (tracee->load_info->interp != NULL) {
/* Load script statement: open. */
Expand All @@ -280,7 +287,7 @@ static int transfer_load_script(Tracee *tracee)
cursor += LOAD_STATEMENT_SIZE(*statement, open);

/* Load script statements: mmap. */
cursor = transcript_mappings(cursor, tracee->load_info->interp->mappings);
cursor = transcript_mappings(cursor, tracee->load_info->interp->mappings, interp_is_pic);

entry_point = ELF_FIELD(tracee->load_info->interp->elf_header, entry);
}
Expand Down
91 changes: 84 additions & 7 deletions src/loader/loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
# define MMAP_OFFSET_SHIFT 0
#endif

#define FATAL() do { \
SYSCALL(EXIT, 1, 182); \
__builtin_unreachable(); \
#define FATAL() do { \
SYSCALL(EXIT, 1, 182); \
__builtin_unreachable(); \
} while (0)

#define unlikely(expr) __builtin_expect(!!(expr), 0)
Expand Down Expand Up @@ -113,6 +113,14 @@ void _start(void *cursor)
bool traced = false;
bool reset_at_base = true;
word_t at_base = 0;
/* PIE relocation: the first segment of each PIE binary is mapped with
* addr=0 to let the kernel choose a conflict-free base. pic_delta is
* the difference between the kernel-chosen base and the original
* planned address; it is applied to all subsequent segments and to
* entry_point / auxv values at startup. */
word_t pic_delta = 0;
word_t exec_pic_delta = 0;
bool has_interp = false;

word_t fd = -1;
word_t status;
Expand All @@ -122,6 +130,9 @@ void _start(void *cursor)

switch (stmt->action) {
case LOAD_ACTION_OPEN_NEXT:
exec_pic_delta = pic_delta;
pic_delta = 0;
has_interp = true;
status = SYSCALL(CLOSE, 1, fd);
if (unlikely((int) status < 0))
FATAL();
Expand Down Expand Up @@ -160,6 +171,41 @@ void _start(void *cursor)
cursor += LOAD_STATEMENT_SIZE(*stmt, mmap);
break;

case LOAD_ACTION_MMAP_PIC_FILE:
if (reset_at_base) {
/* First segment of a PIE binary: let the kernel
* choose a conflict-free base address. */
status = SYSCALL(MMAP, 6, 0, stmt->mmap.length,
stmt->mmap.prot, MAP_PRIVATE, fd,
stmt->mmap.offset >> MMAP_OFFSET_SHIFT);
/* Use IS_ERR_VALUE-style check: on 32-bit targets,
* valid high addresses (e.g. 0xb7...) look negative
* when cast to long; mmap errors are in [-4095,-1]. */
if (unlikely(status >= (word_t)-4095))
FATAL();
pic_delta = status - stmt->mmap.addr;
Comment thread
Ebola-Chan-bot marked this conversation as resolved.
at_base = status;
reset_at_base = false;
} else {
/* Subsequent segments: MAP_FIXED at delta-adjusted
* address within the kernel-assigned region. */
word_t adjusted = stmt->mmap.addr + pic_delta;
status = SYSCALL(MMAP, 6, adjusted, stmt->mmap.length,
stmt->mmap.prot, MAP_PRIVATE | MAP_FIXED, fd,
stmt->mmap.offset >> MMAP_OFFSET_SHIFT);
if (unlikely(status != adjusted))
FATAL();
}

if (stmt->mmap.clear_length != 0) {
word_t actual = stmt->mmap.addr + pic_delta;
clear(actual + stmt->mmap.length - stmt->mmap.clear_length,
actual + stmt->mmap.length);
}

cursor += LOAD_STATEMENT_SIZE(*stmt, mmap);
break;

case LOAD_ACTION_MMAP_ANON:
status = SYSCALL(MMAP, 6, stmt->mmap.addr, stmt->mmap.length,
stmt->mmap.prot, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
Expand All @@ -169,6 +215,29 @@ void _start(void *cursor)
cursor += LOAD_STATEMENT_SIZE(*stmt, mmap);
break;

case LOAD_ACTION_MMAP_PIC_ANON:
if (reset_at_base) {
/* First segment (anon) of a PIE binary: let the
* kernel choose the base address. */
status = SYSCALL(MMAP, 6, 0, stmt->mmap.length,
stmt->mmap.prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
/* IS_ERR_VALUE-style: see LOAD_ACTION_MMAP_PIC_FILE. */
if (unlikely(status >= (word_t)-4095))
FATAL();
pic_delta = status - stmt->mmap.addr;
at_base = status;
Comment thread
Ebola-Chan-bot marked this conversation as resolved.
reset_at_base = false;
} else {
word_t adjusted = stmt->mmap.addr + pic_delta;
status = SYSCALL(MMAP, 6, adjusted, stmt->mmap.length,
stmt->mmap.prot, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
if (unlikely(status != adjusted))
FATAL();
}

cursor += LOAD_STATEMENT_SIZE(*stmt, mmap);
break;

case LOAD_ACTION_MAKE_STACK_EXEC:
SYSCALL(MPROTECT, 3,
stmt->make_stack_exec.start, 1,
Expand All @@ -185,6 +254,14 @@ void _start(void *cursor)
word_t *cursor2 = (word_t *) stmt->start.stack_pointer;
const word_t argc = cursor2[0];
const word_t at_execfn = cursor2[1];
/* Apply PIE relocation deltas computed at mmap time.
* entry_point comes from the interp (if present) or exec,
* so it uses the current pic_delta. at_phdr and at_entry
* always reference the executable, so they use its delta. */
const word_t eff_exec_delta = has_interp ? exec_pic_delta : pic_delta;
const word_t actual_entry = stmt->start.entry_point + pic_delta;
const word_t actual_at_phdr = stmt->start.at_phdr + eff_exec_delta;
const word_t actual_at_entry = stmt->start.at_entry + eff_exec_delta;
word_t name;

status = SYSCALL(CLOSE, 1, fd);
Expand All @@ -209,7 +286,7 @@ void _start(void *cursor)
do {
switch (cursor2[0]) {
case AT_PHDR:
cursor2[1] = stmt->start.at_phdr;
cursor2[1] = actual_at_phdr;
break;

case AT_PHENT:
Expand All @@ -221,7 +298,7 @@ void _start(void *cursor)
break;

case AT_ENTRY:
cursor2[1] = stmt->start.at_entry;
cursor2[1] = actual_at_entry;
break;

case AT_BASE:
Expand All @@ -248,9 +325,9 @@ void _start(void *cursor)
if (unlikely(traced))
SYSCALL(EXECVE, 6, 1,
stmt->start.stack_pointer,
stmt->start.entry_point, 2, 3, 4);
actual_entry, 2, 3, 4);
else
BRANCH(stmt->start.stack_pointer, stmt->start.entry_point);
BRANCH(stmt->start.stack_pointer, actual_entry);
FATAL();
}

Expand Down
6 changes: 6 additions & 0 deletions src/loader/script.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,11 @@ typedef struct load_statement LoadStatement;
#define LOAD_ACTION_MAKE_STACK_EXEC 4
#define LOAD_ACTION_START_TRACED 5
#define LOAD_ACTION_START 6
/* PIE variants: loader performs mmap(addr=0) on the first segment to obtain a
* kernel-assigned base address, then applies a fixed delta to all subsequent
* segments. This avoids EXEC_PIC_ADDRESS / INTERP_PIC_ADDRESS falling inside
* device-specific protection zones (e.g. ~2 GB vdso guard region on some ARM64 kernels). */
#define LOAD_ACTION_MMAP_PIC_FILE 7
#define LOAD_ACTION_MMAP_PIC_ANON 8

#endif /* SCRIPT */
14 changes: 13 additions & 1 deletion src/tracee/event.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,17 +404,29 @@ int handle_tracee_event(Tracee *tracee, int tracee_status)
signal = 0;

if (WIFEXITED(tracee_status)) {
/* No vpid==1 guard here (unlike WIFSIGNALED below): this is
* upstream behavior, and normal WIFEXITED ordering has root
* exiting last — there is no bulk-SIGKILL cleanup phase that
* could overwrite the status as with WIFSIGNALED. */
last_exit_status = WEXITSTATUS(tracee_status);
VERBOSE(tracee, 1,
"vpid %" PRIu64 ": exited with status %d",
tracee->vpid, last_exit_status);
terminate_tracee(tracee);
}
else if (WIFSIGNALED(tracee_status)) {
int termsig = WTERMSIG(tracee_status);
check_architecture(tracee);
/* Only the root tracee (vpid 1) should determine proot's exit
* code. Child tracees killed during cleanup (e.g. SIGKILL after
* root exits) must not overwrite it — upstream never set
* last_exit_status in WIFSIGNALED at all, and blindly doing so
* causes proot to return 137 when children are reaped. */
if (tracee->vpid == 1)
last_exit_status = 128 + termsig;
VERBOSE(tracee, (int) (tracee->vpid != 1),
Comment thread
Ebola-Chan-bot marked this conversation as resolved.
"vpid %" PRIu64 ": terminated with signal %d",
tracee->vpid, WTERMSIG(tracee_status));
tracee->vpid, termsig);
terminate_tracee(tracee);
}
else if (WIFSTOPPED(tracee_status)) {
Expand Down