diff options
| author | bors <bors@rust-lang.org> | 2013-03-25 12:04:11 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2013-03-25 12:04:11 -0700 |
| commit | 6d4499ce4d948bf7b7b385b40e5b3536507ad3e8 (patch) | |
| tree | 8db8ed6265295920b856021d30e18596621b6082 | |
| parent | 6f4273854438e8f9b034e1342f30392f70c0eaae (diff) | |
| parent | a692777224150e2dadb5ec02c6ecd5c10ce0dd98 (diff) | |
| download | rust-6d4499ce4d948bf7b7b385b40e5b3536507ad3e8.tar.gz rust-6d4499ce4d948bf7b7b385b40e5b3536507ad3e8.zip | |
auto merge of #5424 : luqmana/rust/inline-rt, r=brson
As per #2521. Inlining seems to improve performance slightly:
Inlined Not Inlined
x86: 13.5482 14.4112
x86_64: 17.4712 18.0696
(Average of 5 runs timed with `time`)
```Rust
fn foo() -> int {
int::from_str(~"28098").unwrap()
}
fn main() {
for 1000000.times {
foo();
foo();
foo();
foo();
foo();
}
}
```
All run on:
Linux 3.2.0-0.bpo.4-amd64 #1 SMP Debian 3.2.35-2~bpo60+1 x86_64 GNU/Linux
The MIPS and ARM bits I didn't inline since I'm not as familiar with them and I also can't test them. All green on try.
| -rw-r--r-- | src/rt/arch/arm/sp.h | 29 | ||||
| -rw-r--r-- | src/rt/arch/i386/record_sp.S | 60 | ||||
| -rw-r--r-- | src/rt/arch/i386/sp.h | 71 | ||||
| -rw-r--r-- | src/rt/arch/mips/sp.h | 29 | ||||
| -rw-r--r-- | src/rt/arch/x86_64/record_sp.S | 52 | ||||
| -rw-r--r-- | src/rt/arch/x86_64/sp.h | 71 | ||||
| -rw-r--r-- | src/rt/rust_globals.h | 16 | ||||
| -rw-r--r-- | src/rt/rust_task.h | 13 |
8 files changed, 217 insertions, 124 deletions
diff --git a/src/rt/arch/arm/sp.h b/src/rt/arch/arm/sp.h new file mode 100644 index 00000000000..cd798847607 --- /dev/null +++ b/src/rt/arch/arm/sp.h @@ -0,0 +1,29 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Getting the stack pointer and getting/setting sp limit. + +#ifndef SP_H +#define SP_H + +#include "../../rust_globals.h" + +// Gets a pointer to the vicinity of the current stack pointer +extern "C" uintptr_t get_sp(); + +// Gets the pointer to the end of the Rust stack from a platform- +// specific location in the thread control block +extern "C" CDECL uintptr_t get_sp_limit(); + +// Records the pointer to the end of the Rust stack in a platform- +// specific location in the thread control block +extern "C" CDECL void record_sp_limit(void *limit); + +#endif diff --git a/src/rt/arch/i386/record_sp.S b/src/rt/arch/i386/record_sp.S index 20cafa3dffb..e69de29bb2d 100644 --- a/src/rt/arch/i386/record_sp.S +++ b/src/rt/arch/i386/record_sp.S @@ -1,60 +0,0 @@ -.text - -#if defined(__APPLE__) || defined(_WIN32) -#define RECORD_SP_LIMIT _record_sp_limit -#define GET_SP_LIMIT _get_sp_limit -#define GET_SP _get_sp -#else -#define RECORD_SP_LIMIT record_sp_limit -#define GET_SP_LIMIT get_sp_limit -#define GET_SP get_sp -#endif - -.globl RECORD_SP_LIMIT -.globl GET_SP_LIMIT -.globl GET_SP - -#if defined(__linux__) || defined(__FreeBSD__) -RECORD_SP_LIMIT: - movl 4(%esp), %eax - movl %eax, %gs:48 - ret -#endif - -#if defined(__APPLE__) -RECORD_SP_LIMIT: - movl $0x48+90*4, %eax - movl 4(%esp), %ecx - movl %ecx, %gs:(%eax) - ret -#endif - -#if defined(_WIN32) -RECORD_SP_LIMIT: - movl 4(%esp), %eax - movl %eax, %fs:0x14 - ret -#endif - -#if defined(__linux__) || defined(__FreeBSD__) -GET_SP_LIMIT: - movl %gs:48, %eax - ret -#endif - -#if defined(__APPLE__) -GET_SP_LIMIT: - movl $0x48+90*4, %ecx - movl %gs:(%ecx), %eax - ret -#endif - -#if defined(_WIN32) -GET_SP_LIMIT: - movl %fs:0x14, %eax - ret -#endif - -GET_SP: - movl %esp, %eax - ret diff --git a/src/rt/arch/i386/sp.h b/src/rt/arch/i386/sp.h new file mode 100644 index 00000000000..4f4c84c8175 --- /dev/null +++ b/src/rt/arch/i386/sp.h @@ -0,0 +1,71 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Getting the stack pointer and getting/setting sp limit. + +#ifndef SP_H +#define SP_H + +#include "../../rust_globals.h" + +// Gets a pointer to the vicinity of the current stack pointer +extern "C" ALWAYS_INLINE uintptr_t get_sp() { + uintptr_t sp; + asm volatile ( + "movl %%esp, %0" + : "=m"(sp)); + return sp; +} + +// Gets the pointer to the end of the Rust stack from a platform- +// specific location in the thread control block +extern "C" CDECL ALWAYS_INLINE uintptr_t get_sp_limit() { + uintptr_t limit; + +#if defined(__linux__) || defined(__FreeBSD__) + asm volatile ( + "movl %%gs:48, %0" + : "=r"(limit)); +#elif defined(__APPLE__) + asm volatile ( + "movl $0x48+90*4, %%ecx\n\t" + "movl %%gs:(%%ecx), %0" + : "=r"(limit) + :: "ecx"); +#elif defined(_WIN32) + asm volatile ( + "movl %%fs:0x14, %0" + : "=r"(limit)); +#endif + + return limit; +} + +// Records the pointer to the end of the Rust stack in a platform- +// specific location in the thread control block +extern "C" CDECL ALWAYS_INLINE void record_sp_limit(void *limit) { +#if defined(__linux__) || defined(__FreeBSD__) + asm volatile ( + "movl %0, %%gs:48" + :: "r"(limit)); +#elif defined(__APPLE__) + asm volatile ( + "movl $0x48+90*4, %%eax\n\t" + "movl %0, %%gs:(%%eax)" + :: "r"(limit) + : "eax"); +#elif defined(_WIN32) + asm volatile ( + "movl %0, %%fs:0x14" + :: "r"(limit)); +#endif +} + +#endif diff --git a/src/rt/arch/mips/sp.h b/src/rt/arch/mips/sp.h new file mode 100644 index 00000000000..cd798847607 --- /dev/null +++ b/src/rt/arch/mips/sp.h @@ -0,0 +1,29 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Getting the stack pointer and getting/setting sp limit. + +#ifndef SP_H +#define SP_H + +#include "../../rust_globals.h" + +// Gets a pointer to the vicinity of the current stack pointer +extern "C" uintptr_t get_sp(); + +// Gets the pointer to the end of the Rust stack from a platform- +// specific location in the thread control block +extern "C" CDECL uintptr_t get_sp_limit(); + +// Records the pointer to the end of the Rust stack in a platform- +// specific location in the thread control block +extern "C" CDECL void record_sp_limit(void *limit); + +#endif diff --git a/src/rt/arch/x86_64/record_sp.S b/src/rt/arch/x86_64/record_sp.S index 0999e19f328..e69de29bb2d 100644 --- a/src/rt/arch/x86_64/record_sp.S +++ b/src/rt/arch/x86_64/record_sp.S @@ -1,52 +0,0 @@ -.text - -#if defined(__APPLE__) || defined(_WIN32) -#define RECORD_SP_LIMIT _record_sp_limit -#define GET_SP_LIMIT _get_sp_limit -#define GET_SP _get_sp -#else -#define RECORD_SP_LIMIT record_sp_limit -#define GET_SP_LIMIT get_sp_limit -#define GET_SP get_sp -#endif - -.globl RECORD_SP_LIMIT -.globl GET_SP_LIMIT -.globl GET_SP - -#if defined(__linux__) -RECORD_SP_LIMIT: - movq %rdi, %fs:112 - ret -#elif defined(__APPLE__) -RECORD_SP_LIMIT: - movq $0x60+90*8, %rsi - movq %rdi, %gs:(%rsi) - ret -#elif defined(__FreeBSD__) -RECORD_SP_LIMIT: - movq %rdi, %fs:24 - ret -#else -RECORD_SP_LIMIT: - ret -#endif - -#if defined(__linux__) -GET_SP_LIMIT: - movq %fs:112, %rax - ret -#elif defined(__APPLE__) -GET_SP_LIMIT: - movq $0x60+90*8, %rsi - movq %gs:(%rsi), %rax - ret -#elif defined(__FreeBSD__) -GET_SP_LIMIT: - movq %fs:24, %rax - ret -#endif - -GET_SP: - movq %rsp, %rax - ret diff --git a/src/rt/arch/x86_64/sp.h b/src/rt/arch/x86_64/sp.h new file mode 100644 index 00000000000..bf011f4d019 --- /dev/null +++ b/src/rt/arch/x86_64/sp.h @@ -0,0 +1,71 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Getting the stack pointer and getting/setting sp limit. + +#ifndef SP_H +#define SP_H + +#include "../../rust_globals.h" + +// Gets a pointer to the vicinity of the current stack pointer +extern "C" ALWAYS_INLINE uintptr_t get_sp() { + uintptr_t sp; + asm volatile ( + "movq %%rsp, %0" + : "=m"(sp)); + return sp; +} + +// Gets the pointer to the end of the Rust stack from a platform- +// specific location in the thread control block +extern "C" CDECL ALWAYS_INLINE uintptr_t get_sp_limit() { + uintptr_t limit; + +#if defined(__linux__) + asm volatile ( + "movq %%fs:112, %0" + : "=r"(limit)); +#elif defined(__APPLE__) + asm volatile ( + "movq $0x60+90*8, %%rsi\n\t" + "movq %%gs:(%%rsi), %0" + : "=r"(limit) + :: "rsi"); +#elif defined(__FreeBSD__) + asm volatile ( + "movq %%fs:24, %0" + : "=r"(limit)); +#endif + + return limit; +} + +// Records the pointer to the end of the Rust stack in a platform- +// specific location in the thread control block +extern "C" CDECL ALWAYS_INLINE void record_sp_limit(void *limit) { +#if defined(__linux__) + asm volatile ( + "movq %0, %%fs:112" + :: "r"(limit)); +#elif defined(__APPLE__) + asm volatile ( + "movq $0x60+90*8, %%rsi\n\t" + "movq %0, %%gs:(%%rsi)" + :: "r"(limit) + : "rsi"); +#elif defined(__FreeBSD__) + asm volatile ( + "movq %0, %%fs:24" + :: "r"(limit)); +#endif +} + +#endif diff --git a/src/rt/rust_globals.h b/src/rt/rust_globals.h index 3d3ce7562b5..ff57af08337 100644 --- a/src/rt/rust_globals.h +++ b/src/rt/rust_globals.h @@ -11,6 +11,22 @@ #ifndef RUST_GLOBALS_H #define RUST_GLOBALS_H +#if defined(__cplusplus) +#define INLINE inline +#elif defined(_MSC_VER) || defined(__GNUC__) +#define INLINE __inline__ +#else +#define INLINE inline +#endif + +#if defined(__GNUC__) +#define ALWAYS_INLINE __attribute((always_inline)) INLINE +#elif defined(_MSC_VER) +#define ALWAYS_INLINE __forceinline +#else +#define ALWAYS_INLINE INLINE +#endif + #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS 1 #endif diff --git a/src/rt/rust_task.h b/src/rt/rust_task.h index 8c9ec172c45..00d20fefc0e 100644 --- a/src/rt/rust_task.h +++ b/src/rt/rust_task.h @@ -118,6 +118,7 @@ #include "rust_stack.h" #include "rust_type.h" #include "rust_sched_loop.h" +#include "sp.h" // The amount of extra space at the end of each stack segment, available // to the rt, compiler and dynamic linker for running small functions @@ -419,15 +420,6 @@ template <typename T> struct task_owned { } }; -// This stuff is on the stack-switching fast path - -// Records the pointer to the end of the Rust stack in a platform- -// specific location in the thread control block -extern "C" CDECL void record_sp_limit(void *limit); -extern "C" CDECL uintptr_t get_sp_limit(); -// Gets a pointer to the vicinity of the current stack pointer -extern "C" uintptr_t get_sp(); - // This is the function that switches between the C and the Rust stack by // calling another function with a single void* argument while changing the // stack pointer. It has a funny name because gdb doesn't normally like to @@ -600,9 +592,6 @@ rust_task::prev_stack() { record_stack_limit(); } -extern "C" CDECL void -record_sp_limit(void *limit); - // The LLVM-generated segmented-stack function prolog compares the amount of // stack needed for each frame to the end-of-stack pointer stored in the // TCB. As an optimization, when the frame size is less than 256 bytes, it |
