about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBrian Anderson <banderson@mozilla.com>2011-12-18 02:02:35 -0800
committerBrian Anderson <banderson@mozilla.com>2011-12-18 02:02:35 -0800
commit1a1fdf34b165e25c6eca9a2729500a5805fa0c40 (patch)
tree633db68dba320d47bed96cd994770c146244f07a
parent7359fa422b9c30d3aaf08e52cad795778f824f36 (diff)
downloadrust-1a1fdf34b165e25c6eca9a2729500a5805fa0c40.tar.gz
rust-1a1fdf34b165e25c6eca9a2729500a5805fa0c40.zip
rt: Add lots of documentation to __morestack
-rw-r--r--src/rt/arch/i386/morestack.S135
-rw-r--r--src/rt/arch/x86_64/morestack.S46
2 files changed, 139 insertions, 42 deletions
diff --git a/src/rt/arch/i386/morestack.S b/src/rt/arch/i386/morestack.S
index 6e53ef850a0..3c4e95e3ac1 100644
--- a/src/rt/arch/i386/morestack.S
+++ b/src/rt/arch/i386/morestack.S
@@ -1,9 +1,70 @@
-    .text
-
-// __morestack
-//
-// LLVM generates a call to this to allocate more stack space in a function
-// prolog when we run out.
+/*
+	__morestack
+
+	This function implements stack growth using the mechanism
+	devised by Ian Lance Taylor for gccgo, described here:
+
+	http://gcc.gnu.org/wiki/SplitStacks
+
+	The Rust stack is composed of a linked list of stack segments,
+	and each stack segment contains two parts: the work area,
+	where Rust functions are allowed to execute; and the red zone,
+	where no Rust code can execute, but where short runtime
+	functions (including __morestack), the dynamic linker, signal
+	handlers, and the unwinder can run.
+
+	Each Rust function contains an LLVM-generated prologue that
+	compares the stack space required for the current function to
+	the space space remaining in the current stack segment,
+	maintained in a platform-specific TLS slot.  The stack limit
+	is strategically maintained by the Rust runtime so that it is
+	always in place whenever a Rust function is running.
+
+	When there is not enough room to run the function, the function
+	prologue makes a call to __morestack to allocate a new stack
+	segment, copy any stack-based arguments to it, switch stacks,
+	then resume execution of the original function.
+
+	-- The __morestack calling convention --
+
+	For reasons of efficiency the __morestack calling convention
+	is bizarre. The calling function does not attempt to align the
+	stack for the call, and on x86_64 the arguments to __morestack
+	are passed in scratch registers in order to preserve the
+	original function's arguments.
+
+	Once __morestack has switched to the new stack, instead of
+	returning, it then calls into the original function, resuming
+	execution at the instruction following the call to
+	__morestack. Thus, when the original function returns it
+	actually returns to __morestack, which then deallocates the
+	stack and returns again to the original function's caller.
+
+	-- Unwinding --
+
+	All this trickery causes hell when it comes time for the
+	unwinder to navigate it's way through this function. What
+	will happen is the original function will be unwound first
+	without any special effort, then the unwinder encounters
+	the __morestack frame, which is sitting just above a
+	tiny fraction of a frame (containing just a return pointer
+	and, on 32-bit, the arguments to __morestack).
+
+	We deal with this by claiming that that little bit of stack
+	is actually part of the __morestack frame, encoded as
+	DWARF call frame instructions (CFI) by .cfi assembler
+	pseudo-ops.
+
+	One final complication (that took me a week to figure out)
+	is that OS X 10.6+ uses its own 'compact unwind info',
+	an undocumented format generated by the linker from
+	the DWARF CFI. This compact unwind info doesn't correctly
+	capture the nuance of the __morestack frame, and as a
+	result all of our linking on OS X uses the -no_compact_unwind
+	flag.
+*/
+
+.text
 
 #if defined(__APPLE__)
 #define RUST_GET_TASK           L_rust_get_task$stub
@@ -51,13 +112,31 @@ MORESTACK:
 	.cfi_startproc
 #endif
 
+	// This base pointer setup differs from most in that we are
+	// telling the unwinder to consider the Canonical Frame
+	// Address (CFA) for this frame to be the value of the stack
+	// pointer prior to entry to the original function, whereas
+	// the CFA would typically be the the value of the stack
+	// pointer prior to entry to this function. This will allow
+	// the unwinder to understand how to skip the tiny partial
+	// frame that the original function created by calling
+	// __morestack.
+
+	// In practical terms, our CFA is 12 bytes greater than it
+	// would normally be, accounting for the two arguments to
+	// __morestack, and an extra return address.
+
 	pushl %ebp
 #if defined(__linux__) || defined(__APPLE__)
+	// The CFA is 20 bytes above the register that it is
+	// associated with for this frame (which will be %ebp)
 	.cfi_def_cfa_offset 20
+	// %ebp is -20 bytes from the CFA
 	.cfi_offset %ebp, -20
 #endif
 	movl %esp, %ebp
 #if defined(__linux__) || defined(__APPLE__)
+	// Calculate the CFA as an offset from %ebp
 	.cfi_def_cfa_register %ebp
 #endif
 
@@ -81,17 +160,25 @@ MORESTACK:
 
 	// Save the the correct %esp value for our grandparent frame,
 	// for the unwinder
+	// FIXME: This isn't used
 	leal 20(%ebp), %eax
 	movl %eax, -4(%ebp)
 
-	// The arguments to rust_new_stack2
-	movl 56(%esp),%eax  // Size of stack arguments
+	// The arguments to upcall_new_stack
+
+	// The size of the stack arguments to copy to the new stack,
+	// ane of the the arguments to __morestack
+	movl 56(%esp),%eax
 	movl %eax,20(%esp)
-	leal 64(%esp),%eax // Address of stack arguments
+	// The address of the stack arguments to the original function
+	leal 64(%esp),%eax
 	movl %eax,16(%esp)
+	// The amount of stack needed for the original function,
+	// the other argument to __morestack
 	movl 52(%esp),%eax // The amount of stack needed
 	movl %eax,12(%esp)
-	movl $0, 8(%esp)   // Out pointer
+	// Out pointer to the new stack
+	movl $0, 8(%esp)
 
 #ifdef __APPLE__
 	call 1f
@@ -106,18 +193,22 @@ MORESTACK:
 	movl %eax,(%esp)
 	call UPCALL_CALL_C
 
-	movl 48(%esp),%eax          // Grab the return pointer.
-	inc  %eax    // Skip past the ret instruction in the parent fn
+	// Grab the __morestack return pointer
+	movl 48(%esp),%eax
+	// Skip past the ret instruction in the parent fn
+	inc  %eax
 
-	// Restore fastcc arguments
+	// Restore the fastcc arguments to the original function
 	movl 28(%esp), %ecx
 	movl 24(%esp), %edx
 
-	movl 8(%esp),%esp          // Switch stacks.
-	call *%eax                 // Re-enter the function that called us.
+        // Switch stacks
+	movl 8(%esp),%esp
+        // Re-enter the function that called us
+	call *%eax
 
-	// Now the function that called us has returned, so we need to delete the
-	// old stack space.
+	// Now the function that called us has returned, so we need to
+	// delete the old stack space
 
 	// Switch back to the rust stack
 	movl %ebp, %esp
@@ -127,8 +218,8 @@ MORESTACK:
 	subl $4, %esp
 
 	// Now that we're on the return path we want to avoid
-	// stomping on %eax. FIXME: Need to save and restore
-	// eax to actually preserve it across the call to delete the stack
+	// stomping on %eax. FIXME: Need to save and restore %eax to
+	// actually preserve it across the call to delete the stack
 #ifdef __APPLE__
 	call 1f
 1:	popl %ecx
@@ -144,8 +235,14 @@ MORESTACK:
 	addl $12,%esp
 
 	popl %ebp
+
+	// FIXME: I don't think these rules are necessary
+	// since the unwinder should never encounter an instruction
+	// pointer pointing here.
 #if defined(__linux__) || defined(__APPLE__)
+	// Restore the rule for how to find %ebp
 	.cfi_restore %ebp
+	// Tell the unwinder how to find the CFA in terms of %esp
 	.cfi_def_cfa %esp, 16
 #endif
 	retl $8
diff --git a/src/rt/arch/x86_64/morestack.S b/src/rt/arch/x86_64/morestack.S
index 5378a1dcc60..9cd37d656ac 100644
--- a/src/rt/arch/x86_64/morestack.S
+++ b/src/rt/arch/x86_64/morestack.S
@@ -1,9 +1,10 @@
-    .text
+/*
+	__morestack
 
-// __morestack
-//
-// LLVM generates a call to this to allocate more stack space in a functiono
-// prolog when we run out.
+	See i386/morestack.S for the lengthy, general explanation.
+*/
+
+.text
 
 #if defined(__APPLE__) || defined(_WIN32)
 #define UPCALL_NEW_STACK        _upcall_new_stack
@@ -17,25 +18,11 @@
 #define MORESTACK               __morestack
 #endif
 
-        // Naturally, nobody can agree as to
-        // which arguments should go in which
-        // registers:
-#if defined(_WIN32)
-#  define ARG0 %rcx
-#  define ARG1 %rdx
-#  define ARG2 %r8
-#else
-#  define ARG0 %rdi
-#  define ARG1 %rsi
-#  define ARG2 %rdx
-#endif
-
 .globl UPCALL_NEW_STACK
 .globl UPCALL_DEL_STACK
 .globl UPCALL_CALL_C
 .globl MORESTACK
 
-// FIXME: What about _WIN32?	
 #if defined(__linux__)
 	.hidden MORESTACK
 #else
@@ -48,24 +35,31 @@
 	.type MORESTACK,@function
 #endif
 
+
 #if defined(__linux__) || defined(__APPLE__)
 MORESTACK:
 	.cfi_startproc
-	
-	// Set up a normal backtrace
+
 	pushq %rbp
+	// The CFA is 24 bytes above the register that it will
+	// be associated with for this frame (%rbp). That is 8
+	// bytes greater than a normal frame, to allow the unwinder
+	// to skip the partial frame of the original function.
 	.cfi_def_cfa_offset 24
+	// %rbp is -24 bytes from the CFA
 	.cfi_offset %rbp, -24
 	movq %rsp, %rbp
+	// Calculate the CFA as on offset from %ebp
 	.cfi_def_cfa_register %rbp
 
 	// Save the grandparent stack pointer for the unwinder
+	// FIXME: This isn't used
 	leaq 24(%rbp), %rax
 	pushq %rax
 
 	// FIXME: libgcc also saves rax. not sure if we need to
 
-	// Save argument registers
+	// Save argument registers of the original function
 	pushq	%rdi
 	pushq	%rsi
 	pushq	%rdx
@@ -79,6 +73,8 @@ MORESTACK:
 	movq %rbp, %rcx
 	addq $24, %rcx  // Base pointer, return address x2
 
+	// The arguments to __morestack are passed in %r10 & %r11
+
 	pushq %r11 // Size of stack arguments
 	pushq %rcx // Address of stack arguments
 	pushq %r10 // The amount of stack needed
@@ -119,7 +115,8 @@ MORESTACK:
 
 	// Align the stack again
 	pushq $0
-	
+
+	// FIXME: Should preserve %rax here
 	movq UPCALL_DEL_STACK@GOTPCREL(%rip), %rsi
 	movq $0, %rdi
 #ifdef __APPLE__
@@ -131,6 +128,9 @@ MORESTACK:
 
 	addq $8, %rsp
 	popq %rbp
+	// FIXME: I don't think these rules are necessary
+	// since the unwinder should never encounter an instruction
+	// pointer pointing here.
 	.cfi_restore %rbp
 	.cfi_def_cfa %rsp, 16
 	ret