| Age | Commit message (Collapse) | Author | Lines |
|
cc #13226 (the tracking issue)
|
|
Simplify RangeInclusive::next[_back]
`match`ing on an `Option<Ordering>` seems cause some confusion for LLVM; switching to just using comparison operators removes a few jumps from the simple `for` loops I was trying.
cc https://github.com/rust-lang/rust/issues/45222 https://github.com/rust-lang/rust/issues/28237#issuecomment-363706510
Example:
```rust
#[no_mangle]
pub fn coresum(x: std::ops::RangeInclusive<u64>) -> u64 {
let mut sum = 0;
for i in x {
sum += i ^ (i-1);
}
sum
}
```
Today:
```asm
coresum:
xor r8d, r8d
mov r9, -1
xor eax, eax
jmp .LBB0_1
.LBB0_4:
lea rcx, [rdi - 1]
xor rcx, rdi
add rax, rcx
mov rsi, rdx
mov rdi, r10
.LBB0_1:
cmp rdi, rsi
mov ecx, 1
cmovb rcx, r9
cmove rcx, r8
test rcx, rcx
mov edx, 0
mov r10d, 1
je .LBB0_4 // 1
cmp rcx, -1
jne .LBB0_5 // 2
lea r10, [rdi + 1]
mov rdx, rsi
jmp .LBB0_4 // 3
.LBB0_5:
ret
```
With this PR:
```asm
coresum:
cmp rcx, rdx
jbe .LBB0_2
xor eax, eax
ret
.LBB0_2:
xor r8d, r8d
mov r9d, 1
xor eax, eax
.p2align 4, 0x90
.LBB0_3:
lea r10, [rcx + 1]
cmp rcx, rdx
cmovae rdx, r8
cmovae r10, r9
lea r11, [rcx - 1]
xor r11, rcx
add rax, r11
mov rcx, r10
cmp r10, rdx
jbe .LBB0_3 // Just this
ret
```
<details><summary>Though using internal iteration (`.map(|i| i ^ (i-1)).sum()`) is still shorter to type, and lets the compiler unroll it</summary>
```asm
coresum_inner:
.Lcfi0:
.seh_proc coresum_inner
sub rsp, 168
.Lcfi1:
.seh_stackalloc 168
vmovdqa xmmword ptr [rsp + 144], xmm15
.Lcfi2:
.seh_savexmm 15, 144
vmovdqa xmmword ptr [rsp + 128], xmm14
.Lcfi3:
.seh_savexmm 14, 128
vmovdqa xmmword ptr [rsp + 112], xmm13
.Lcfi4:
.seh_savexmm 13, 112
vmovdqa xmmword ptr [rsp + 96], xmm12
.Lcfi5:
.seh_savexmm 12, 96
vmovdqa xmmword ptr [rsp + 80], xmm11
.Lcfi6:
.seh_savexmm 11, 80
vmovdqa xmmword ptr [rsp + 64], xmm10
.Lcfi7:
.seh_savexmm 10, 64
vmovdqa xmmword ptr [rsp + 48], xmm9
.Lcfi8:
.seh_savexmm 9, 48
vmovdqa xmmword ptr [rsp + 32], xmm8
.Lcfi9:
.seh_savexmm 8, 32
vmovdqa xmmword ptr [rsp + 16], xmm7
.Lcfi10:
.seh_savexmm 7, 16
vmovdqa xmmword ptr [rsp], xmm6
.Lcfi11:
.seh_savexmm 6, 0
.Lcfi12:
.seh_endprologue
cmp rdx, rcx
jae .LBB1_2
xor eax, eax
jmp .LBB1_13
.LBB1_2:
mov r8, rdx
sub r8, rcx
jbe .LBB1_3
cmp r8, 7
jbe .LBB1_5
mov rax, r8
and rax, -8
mov r9, r8
and r9, -8
je .LBB1_5
add rax, rcx
vmovq xmm0, rcx
vpshufd xmm0, xmm0, 68
mov ecx, 1
vmovq xmm1, rcx
vpslldq xmm1, xmm1, 8
vpaddq xmm1, xmm0, xmm1
vpxor xmm0, xmm0, xmm0
vpcmpeqd xmm11, xmm11, xmm11
vmovdqa xmm12, xmmword ptr [rip + __xmm@00000000000000010000000000000001]
vmovdqa xmm13, xmmword ptr [rip + __xmm@00000000000000030000000000000003]
vmovdqa xmm14, xmmword ptr [rip + __xmm@00000000000000050000000000000005]
vmovdqa xmm15, xmmword ptr [rip + __xmm@00000000000000080000000000000008]
mov rcx, r9
vpxor xmm4, xmm4, xmm4
vpxor xmm5, xmm5, xmm5
vpxor xmm6, xmm6, xmm6
.p2align 4, 0x90
.LBB1_9:
vpaddq xmm7, xmm1, xmmword ptr [rip + __xmm@00000000000000020000000000000002]
vpaddq xmm9, xmm1, xmmword ptr [rip + __xmm@00000000000000040000000000000004]
vpaddq xmm10, xmm1, xmmword ptr [rip + __xmm@00000000000000060000000000000006]
vpaddq xmm8, xmm1, xmm12
vpxor xmm7, xmm8, xmm7
vpaddq xmm2, xmm1, xmm13
vpxor xmm8, xmm2, xmm9
vpaddq xmm3, xmm1, xmm14
vpxor xmm3, xmm3, xmm10
vpaddq xmm2, xmm1, xmm11
vpxor xmm2, xmm2, xmm1
vpaddq xmm0, xmm2, xmm0
vpaddq xmm4, xmm7, xmm4
vpaddq xmm5, xmm8, xmm5
vpaddq xmm6, xmm3, xmm6
vpaddq xmm1, xmm1, xmm15
add rcx, -8
jne .LBB1_9
vpaddq xmm0, xmm4, xmm0
vpaddq xmm0, xmm5, xmm0
vpaddq xmm0, xmm6, xmm0
vpshufd xmm1, xmm0, 78
vpaddq xmm0, xmm0, xmm1
vmovq r10, xmm0
cmp r8, r9
jne .LBB1_6
jmp .LBB1_11
.LBB1_3:
xor r10d, r10d
jmp .LBB1_12
.LBB1_5:
xor r10d, r10d
mov rax, rcx
.p2align 4, 0x90
.LBB1_6:
lea rcx, [rax - 1]
xor rcx, rax
inc rax
add r10, rcx
cmp rdx, rax
jne .LBB1_6
.LBB1_11:
mov rcx, rdx
.LBB1_12:
lea rax, [rcx - 1]
xor rax, rcx
add rax, r10
.LBB1_13:
vmovaps xmm6, xmmword ptr [rsp]
vmovaps xmm7, xmmword ptr [rsp + 16]
vmovaps xmm8, xmmword ptr [rsp + 32]
vmovaps xmm9, xmmword ptr [rsp + 48]
vmovaps xmm10, xmmword ptr [rsp + 64]
vmovaps xmm11, xmmword ptr [rsp + 80]
vmovaps xmm12, xmmword ptr [rsp + 96]
vmovaps xmm13, xmmword ptr [rsp + 112]
vmovaps xmm14, xmmword ptr [rsp + 128]
vmovaps xmm15, xmmword ptr [rsp + 144]
add rsp, 168
ret
.seh_handlerdata
.section .text,"xr",one_only,coresum_inner
.Lcfi13:
.seh_endproc
```
</details>
|
|
`match`ing on an `Option<Ordering>` seems cause some confusion for LLVM; switching to just using comparison operators removes a few jumps from the simple `for` loops I was trying.
|
|
Implement TrustedLen for Take<Repeat> and Take<RangeFrom>
This will allow optimization of simple `repeat(x).take(n).collect()` iterators, which are currently not vectorized and have capacity checks.
This will only support a few aggregates on `Repeat` and `RangeFrom`, which might be enough for simple cases, but doesn't optimize more complex ones. Namely, Cycle, StepBy, Filter, FilterMap, Peekable, SkipWhile, Skip, FlatMap, Fuse and Inspect are not marked `TrustedLen` when the inner iterator is infinite.
Previous discussion can be found in #47082
r? @alexcrichton
|
|
Override try_[r]fold for RangeInclusive
Because the last item needs special handling, it seems that LLVM has trouble canonicalizing the loops in external iteration. With the override, it becomes obvious that the start==end case exits the loop (as opposed to the one *after* that exiting the loop in external iteration).
Demo adapted from https://github.com/rust-lang/rust/issues/45222
```rust
#[no_mangle]
pub fn foo3r(n: u64) -> u64 {
let mut count = 0;
(0..n).for_each(|_| {
(0 ..= n).rev().for_each(|j| {
count += j;
})
});
count
}
```
<details>
<summary>Current nightly ASM, 100 lines (https://play.rust-lang.org/?gist=f5674c702c6e2045c3aab5d03763e5f6&version=nightly&mode=release)</summary>
```asm
foo3r:
pushq %rbx
.Lcfi0:
.Lcfi1:
testq %rdi, %rdi
je .LBB0_1
testb $1, %dil
jne .LBB0_4
xorl %eax, %eax
xorl %r8d, %r8d
cmpq $1, %rdi
jne .LBB0_11
jmp .LBB0_23
.LBB0_1:
xorl %eax, %eax
popq %rbx
retq
.LBB0_4:
xorl %r8d, %r8d
movq $-1, %r9
xorl %eax, %eax
movq %rdi, %r11
xorl %r10d, %r10d
jmp .LBB0_5
.LBB0_8:
addq %r11, %rax
movq %rsi, %r11
movq %rdx, %r10
.LBB0_5:
cmpq %r11, %r10
movl $1, %ecx
cmovbq %r9, %rcx
cmoveq %r8, %rcx
testq %rcx, %rcx
movl $0, %esi
movl $1, %edx
je .LBB0_8
cmpq $-1, %rcx
jne .LBB0_9
leaq -1(%r11), %rsi
movq %r10, %rdx
jmp .LBB0_8
.LBB0_9:
movl $1, %r8d
cmpq $1, %rdi
je .LBB0_23
.LBB0_11:
xorl %r9d, %r9d
movq $-1, %r10
.LBB0_12:
movq %rdi, %rsi
xorl %r11d, %r11d
jmp .LBB0_13
.LBB0_16:
addq %rsi, %rax
movq %rcx, %rsi
movq %rbx, %r11
.LBB0_13:
cmpq %rsi, %r11
movl $1, %edx
cmovbq %r10, %rdx
cmoveq %r9, %rdx
testq %rdx, %rdx
movl $0, %ecx
movl $1, %ebx
je .LBB0_16
cmpq $-1, %rdx
jne .LBB0_17
leaq -1(%rsi), %rcx
movq %r11, %rbx
jmp .LBB0_16
.LBB0_17:
movq %rdi, %rcx
xorl %r11d, %r11d
jmp .LBB0_18
.LBB0_21:
addq %rcx, %rax
movq %rsi, %rcx
movq %rbx, %r11
.LBB0_18:
cmpq %rcx, %r11
movl $1, %edx
cmovbq %r10, %rdx
cmoveq %r9, %rdx
testq %rdx, %rdx
movl $0, %esi
movl $1, %ebx
je .LBB0_21
cmpq $-1, %rdx
jne .LBB0_22
leaq -1(%rcx), %rsi
movq %r11, %rbx
jmp .LBB0_21
.LBB0_22:
addq $2, %r8
cmpq %rdi, %r8
jne .LBB0_12
.LBB0_23:
popq %rbx
retq
.Lfunc_end0:
```
</details><br>
With this PR:
```asm
foo3r:
test rcx, rcx
je .LBB3_1
lea r8, [rcx - 1]
lea rdx, [rcx - 2]
mov rax, r8
mul rdx
shld rdx, rax, 63
imul r8, r8
add r8, rcx
sub r8, rdx
imul r8, rcx
mov rax, r8
ret
.LBB3_1:
xor r8d, r8d
mov rax, r8
ret
```
|
|
Because the last item needs special handling, it seems that LLVM has trouble canonicalizing the loops in external iteration. With the override, it becomes obvious that the start==end case exits the loop (as opposed to the one *after* that exiting the loop in external iteration).
|
|
|
|
Specialize StepBy::nth
This allows optimizations of implementations of the inner iterator's `.nth` method.
|
|
|
|
|
|
|
|
|
|
Optimize slice.{r}position result bounds check
Second attempt of https://github.com/rust-lang/rust/pull/45501
Fixes https://github.com/rust-lang/rust/issues/45964
Demo: https://godbolt.org/g/N4mBHp
|
|
Add slice::ExactChunks and ::ExactChunksMut iterators
These guarantee that always the requested slice size will be returned
and any leftoever elements at the end will be ignored. It allows llvm to
get rid of bounds checks in the code using the iterator.
This is inspired by the same iterators provided by ndarray.
Fixes https://github.com/rust-lang/rust/issues/47115
I'll add unit tests for all this if the general idea and behaviour makes sense for everybody.
Also see https://github.com/rust-lang/rust/issues/47115#issuecomment-354715511 for an example what this improves.
|
|
These are basically modified copies of the chunks/chunks_mut tests.
|
|
Easy enough to do and ensures that the whole chunk is as expected
instead of just the element that was looked at before.
|
|
|
|
Re-enable num tests on wasm
Issue #42630 was closed but the tests are still ignored, supposedly they should pass now.
|
|
|
|
Add iterator method specialisations to Range*
Add specialised implementations of `max` for `Range`, and `last`, `min` and `max` for `RangeInclusive`, all of which lead to significant advantages in the generated assembly on x86.
Note that adding specialisations of `min` and `last` for `Range` led to no benefit, and adding `sum` for `Range` and `RangeInclusive` led to type inference issues (though this is possibly still worthwhile considering the performance gain).
This addresses some of the concerns in #39975.
|
|
Deprecate [T]::rotate in favor of [T]::rotate_{left,right}.
Background
==========
Slices currently have an **unstable** [`rotate`] method which rotates
elements in the slice to the _left_ N positions. [Here][tracking] is the
tracking issue for this unstable feature.
```rust
let mut a = ['a', 'b' ,'c', 'd', 'e', 'f'];
a.rotate(2);
assert_eq!(a, ['c', 'd', 'e', 'f', 'a', 'b']);
```
Proposal
========
Deprecate the [`rotate`] method and introduce `rotate_left` and
`rotate_right` methods.
```rust
let mut a = ['a', 'b' ,'c', 'd', 'e', 'f'];
a.rotate_left(2);
assert_eq!(a, ['c', 'd', 'e', 'f', 'a', 'b']);
```
```rust
let mut a = ['a', 'b' ,'c', 'd', 'e', 'f'];
a.rotate_right(2);
assert_eq!(a, ['e', 'f', 'a', 'b', 'c', 'd']);
```
Justification
=============
I used this method today for my first time and (probably because I’m a
naive westerner who reads LTR) was surprised when the docs mentioned that
elements get rotated in a left-ward direction. I was in a situation
where I needed to shift elements in a right-ward direction and had to
context switch from the main problem I was working on and think how much
to rotate left in order to accomplish the right-ward rotation I needed.
Ruby’s `Array.rotate` shifts left-ward, Python’s `deque.rotate` shifts
right-ward. Both of their implementations allow passing negative numbers
to shift in the opposite direction respectively. The current `rotate`
implementation takes an unsigned integer argument which doesn't allow
the negative number behavior.
Introducing `rotate_left` and `rotate_right` would:
- remove ambiguity about direction (alleviating need to read docs 😉)
- make it easier for people who need to rotate right
[`rotate`]: https://doc.rust-lang.org/std/primitive.slice.html#method.rotate
[tracking]: https://github.com/rust-lang/rust/issues/41891
|
|
|
|
Make double ended searchers use dependent fingers
(fixes #47175)
r? @burntsushi @alexcrichton
needs uplift to beta
|
|
|
|
|
|
|
|
|
|
For testing if the TrustedRandomAccess implementation works.
|
|
Use memchr for str::find(char)
This is a 10x improvement for searching for characters.
This also contains the patches from https://github.com/rust-lang/rust/pull/46713 . Feel free to land both separately or together.
cc @mystor @alexcrichton
r? @bluss
fixes #46693
|
|
|
|
|
|
Background
==========
Slices currently have an unstable [`rotate`] method which rotates
elements in the slice to the _left_ N positions. [Here][tracking] is the
tracking issue for this unstable feature.
```rust
let mut a = ['a', 'b' ,'c', 'd', 'e', 'f'];
a.rotate(2);
assert_eq!(a, ['c', 'd', 'e', 'f', 'a', 'b']);
```
Proposal
========
Deprecate the [`rotate`] method and introduce `rotate_left` and
`rotate_right` methods.
```rust
let mut a = ['a', 'b' ,'c', 'd', 'e', 'f'];
a.rotate_left(2);
assert_eq!(a, ['c', 'd', 'e', 'f', 'a', 'b']);
```
```rust
let mut a = ['a', 'b' ,'c', 'd', 'e', 'f'];
a.rotate_right(2);
assert_eq!(a, ['e', 'f', 'a', 'b', 'c', 'd']);
```
Justification
=============
I used this method today for my first time and (probably because I’m a
naive westerner who reads LTR) was surprised when the docs mentioned that
elements get rotated in a left-ward direction. I was in a situation
where I needed to shift elements in a right-ward direction and had to
context switch from the main problem I was working on and think how much
to rotate left in order to accomplish the right-ward rotation I needed.
Ruby’s `Array.rotate` shifts left-ward, Python’s `deque.rotate` shifts
right-ward. Both of their implementations allow passing negative numbers
to shift in the opposite direction respectively.
Introducing `rotate_left` and `rotate_right` would:
- remove ambiguity about direction (alleviating need to read docs 😉)
- make it easier for people who need to rotate right
[`rotate`]: https://doc.rust-lang.org/std/primitive.slice.html#method.rotate
[tracking]: https://github.com/rust-lang/rust/issues/41891
|
|
Always `Debug` floats with a decimal point
Fixes #30967
r? @dtolnay
|
|
Make fmt::DebugList and friends forward formatting parameters
For example, formatting slice of integers with `{:04?}` should zero-pad each integer.
This also affects every use of `#[derive(Debug)]`.
|
|
|
|
|
|
|
|
|
|
Stabilize const-calling existing const-fns in std
Fixes #46038
|
|
Remove `T: Sized` on `ptr::is_null()`
Originally from #44932 -- this is purely a revert of the last commit of that PR, which was removing some changes from the previous commits in the PR. So a revert of a revert means this is code written by @cuviper!
@mikeyhew makes a compelling case in https://github.com/rust-lang/rfcs/issues/433#issuecomment-345495505 for why this is the right way to implement `is_null` for trait objects. And the behavior for slices makes sense to me as well.
```diff
impl<T: ?Sized> *const T {
- pub fn is_null(self) -> bool where T: Sized;
+ pub fn is_null(self) -> bool;
}
impl<T: ?Sized> *mut T {
- pub fn is_null(self) -> bool where T: Sized;
+ pub fn is_null(self) -> bool;
}
|
|
Fixes #46038
|
|
For example, formatting slice of integers with `{:04?}`
should zero-pad each integer.
|
|
std: Add a new wasm32-unknown-unknown target
This commit adds a new target to the compiler: wasm32-unknown-unknown. This target is a reimagining of what it looks like to generate WebAssembly code from Rust. Instead of using Emscripten which can bring with it a weighty runtime this instead is a target which uses only the LLVM backend for WebAssembly and a "custom linker" for now which will hopefully one day be direct calls to lld.
Notable features of this target include:
* There is zero runtime footprint. The target assumes nothing exists other than the wasm32 instruction set.
* There is zero toolchain footprint beyond adding the target. No custom linker is needed, rustc contains everything.
* Very small wasm modules can be generated directly from Rust code using this target.
* Most of the standard library is stubbed out to return an error, but anything related to allocation works (aka `HashMap`, `Vec`, etc).
* Naturally, any `#[no_std]` crate should be 100% compatible with this new target.
This target is currently somewhat janky due to how linking works. The "linking" is currently unconditional whole program LTO (aka LLVM is being used as a linker). Naturally that means compiling programs is pretty slow! Eventually though this target should have a linker.
This target is also intended to be quite experimental. I'm hoping that this can act as a catalyst for further experimentation in Rust with WebAssembly. Breaking changes are very likely to land to this target, so it's not recommended to rely on it in any critical capacity yet. We'll let you know when it's "production ready".
### Building yourself
First you'll need to configure the build of LLVM and enable this target
```
$ ./configure --target=wasm32-unknown-unknown --set llvm.experimental-targets=WebAssembly
```
Next you'll want to remove any previously compiled LLVM as it needs to be rebuilt with WebAssembly support. You can do that with:
```
$ rm -rf build
```
And then you're good to go! A `./x.py build` should give you a rustc with the appropriate libstd target.
### Test support
Currently testing-wise this target is looking pretty good but isn't complete. I've got almost the entire `run-pass` test suite working with this target (lots of tests ignored, but many passing as well). The `core` test suite is [still getting LLVM bugs fixed](https://reviews.llvm.org/D39866) to get that working and will take some time. Relatively simple programs all seem to work though!
In general I've only tested this with a local fork that makes use of LLVM 5 rather than our current LLVM 4 on master. The LLVM 4 WebAssembly backend AFAIK isn't broken per se but is likely missing bug fixes available on LLVM 5. I'm hoping though that we can decouple the LLVM 5 upgrade and adding this wasm target!
### But the modules generated are huge!
It's worth nothing that you may not immediately see the "smallest possible wasm module" for the input you feed to rustc. For various reasons it's very difficult to get rid of the final "bloat" in vanilla rustc (again, a real linker should fix all this). For now what you'll have to do is:
cargo install --git https://github.com/alexcrichton/wasm-gc
wasm-gc foo.wasm bar.wasm
And then `bar.wasm` should be the smallest we can get it!
---
In any case for now I'd love feedback on this, particularly on the various integration points if you've got better ideas of how to approach them!
|
|
This commit adds a new target to the compiler: wasm32-unknown-unknown. This
target is a reimagining of what it looks like to generate WebAssembly code from
Rust. Instead of using Emscripten which can bring with it a weighty runtime this
instead is a target which uses only the LLVM backend for WebAssembly and a
"custom linker" for now which will hopefully one day be direct calls to lld.
Notable features of this target include:
* There is zero runtime footprint. The target assumes nothing exists other than
the wasm32 instruction set.
* There is zero toolchain footprint beyond adding the target. No custom linker
is needed, rustc contains everything.
* Very small wasm modules can be generated directly from Rust code using this
target.
* Most of the standard library is stubbed out to return an error, but anything
related to allocation works (aka `HashMap`, `Vec`, etc).
* Naturally, any `#[no_std]` crate should be 100% compatible with this new
target.
This target is currently somewhat janky due to how linking works. The "linking"
is currently unconditional whole program LTO (aka LLVM is being used as a
linker). Naturally that means compiling programs is pretty slow! Eventually
though this target should have a linker.
This target is also intended to be quite experimental. I'm hoping that this can
act as a catalyst for further experimentation in Rust with WebAssembly. Breaking
changes are very likely to land to this target, so it's not recommended to rely
on it in any critical capacity yet. We'll let you know when it's "production
ready".
---
Currently testing-wise this target is looking pretty good but isn't complete.
I've got almost the entire `run-pass` test suite working with this target (lots
of tests ignored, but many passing as well). The `core` test suite is still
getting LLVM bugs fixed to get that working and will take some time. Relatively
simple programs all seem to work though!
---
It's worth nothing that you may not immediately see the "smallest possible wasm
module" for the input you feed to rustc. For various reasons it's very difficult
to get rid of the final "bloat" in vanilla rustc (again, a real linker should
fix all this). For now what you'll have to do is:
cargo install --git https://github.com/alexcrichton/wasm-gc
wasm-gc foo.wasm bar.wasm
And then `bar.wasm` should be the smallest we can get it!
---
In any case for now I'd love feedback on this, particularly on the various
integration points if you've got better ideas of how to approach them!
|
|
This reverts commit 604f049cd5060129cf14f7bd340d442811345ea8.
This is purely a revert of cuviper's revert "Restore `T: Sized` on
`ptr::is_null`". So double revert means this is code written by cuviper!
|
|
Short-circuiting internal iteration with Iterator::try_fold & try_rfold
These are the core methods in terms of which the other methods (`fold`, `all`, `any`, `find`, `position`, `nth`, ...) can be implemented, allowing Iterator implementors to get the full goodness of internal iteration by only overriding one method (per direction).
Based off the `Try` trait, so works with both `Result` and `Option` (:tada: https://github.com/rust-lang/rust/pull/42526). The `try_fold` rustdoc examples use `Option` and the `try_rfold` ones use `Result`.
AKA continuing in the vein of PRs https://github.com/rust-lang/rust/pull/44682 & https://github.com/rust-lang/rust/pull/44856 for more of `Iterator`.
New bench following the pattern from the latter of those:
```
test iter::bench_take_while_chain_ref_sum ... bench: 1,130,843 ns/iter (+/- 25,110)
test iter::bench_take_while_chain_sum ... bench: 362,530 ns/iter (+/- 391)
```
I also ran the benches without the `fold` & `rfold` overrides to test their new default impls, with basically no change. I left them there, though, to take advantage of existing overrides and because `AlwaysOk` has some sub-optimality due to https://github.com/rust-lang/rust/issues/43278 (which 45225 should fix).
If you're wondering why there are three type parameters, see issue https://github.com/rust-lang/rust/issues/45462
Thanks for @bluss for the [original IRLO thread](https://internals.rust-lang.org/t/pre-rfc-fold-ok-is-composable-internal-iteration/4434) and the rfold PR and to @cuviper for adding so many folds, [encouraging me](https://github.com/rust-lang/rust/pull/45379#issuecomment-339424670) to make this PR, and finding a catastrophic bug in a pre-review.
|
|
Improve SliceExt::binary_search performance
Improve the performance of binary_search by reducing the number of unpredictable conditional branches in the loop. In addition improve the benchmarks to test performance in l1, l2 and l3 caches on sorted arrays with or without dups.
Before:
```
test slice::binary_search_l1 ... bench: 48 ns/iter (+/- 1)
test slice::binary_search_l2 ... bench: 63 ns/iter (+/- 0)
test slice::binary_search_l3 ... bench: 152 ns/iter (+/- 12)
test slice::binary_search_l1_with_dups ... bench: 36 ns/iter (+/- 0)
test slice::binary_search_l2_with_dups ... bench: 64 ns/iter (+/- 1)
test slice::binary_search_l3_with_dups ... bench: 153 ns/iter (+/- 6)
```
After:
```
test slice::binary_search_l1 ... bench: 15 ns/iter (+/- 0)
test slice::binary_search_l2 ... bench: 23 ns/iter (+/- 0)
test slice::binary_search_l3 ... bench: 100 ns/iter (+/- 17)
test slice::binary_search_l1_with_dups ... bench: 15 ns/iter (+/- 0)
test slice::binary_search_l2_with_dups ... bench: 23 ns/iter (+/- 0)
test slice::binary_search_l3_with_dups ... bench: 98 ns/iter (+/- 14)
```
|
|
unpredictable conditional branches in the loop. In addition improve the
benchmarks to test performance in l1, l2 and l3 caches on sorted arrays
with or without dups.
Before:
```
test slice::binary_search_l1 ... bench: 48 ns/iter (+/- 1)
test slice::binary_search_l2 ... bench: 63 ns/iter (+/- 0)
test slice::binary_search_l3 ... bench: 152 ns/iter (+/- 12)
test slice::binary_search_l1_with_dups ... bench: 36 ns/iter (+/- 0)
test slice::binary_search_l2_with_dups ... bench: 64 ns/iter (+/- 1)
test slice::binary_search_l3_with_dups ... bench: 153 ns/iter (+/- 6)
```
After:
```
test slice::binary_search_l1 ... bench: 15 ns/iter (+/- 0)
test slice::binary_search_l2 ... bench: 23 ns/iter (+/- 0)
test slice::binary_search_l3 ... bench: 100 ns/iter (+/- 17)
test slice::binary_search_l1_with_dups ... bench: 15 ns/iter (+/- 0)
test slice::binary_search_l2_with_dups ... bench: 23 ns/iter (+/- 0)
test slice::binary_search_l3_with_dups ... bench: 98 ns/iter (+/- 14)
```
|
|
This commit removes the `rand` crate from the standard library facade as
well as the `__rand` module in the standard library. Neither of these
were used in any meaningful way in the standard library itself. The only
need for randomness in libstd is to initialize the thread-local keys of
a `HashMap`, and that unconditionally used `OsRng` defined in the
standard library anyway.
The cruft of the `rand` crate and the extra `rand` support in the
standard library makes libstd slightly more difficult to port to new
platforms, namely WebAssembly which doesn't have any randomness at all
(without interfacing with JS). The purpose of this commit is to clarify
and streamline randomness in libstd, focusing on how it's only required
in one location, hashmap seeds.
Note that the `rand` crate out of tree has almost always been a drop-in
replacement for the `rand` crate in-tree, so any usage (accidental or
purposeful) of the crate in-tree should switch to the `rand` crate on
crates.io. This then also has the further benefit of avoiding
duplication (mostly) between the two crates!
|
|
Remove `T: Sized` on pointer `as_ref()` and `as_mut()`
`NonZero::is_zero()` was already casting all pointers to thin `*mut u8` to check for null. The same test on unsized fat pointers can also be used with `as_ref()` and `as_mut()` to get fat references.
(This PR formerly changed `is_null()` too, but checking just the data pointer is not obviously correct for trait objects, especially if `*const self` sorts of methods are ever allowed.)
|