| Age | Commit message (Collapse) | Author | Lines |
|
|
|
Optimize `escape_ascii` using a lookup table
Based upon my suggestion here: https://github.com/rust-lang/rust/pull/125340#issuecomment-2130441817
Effectively, we can take advantage of the fact that ASCII only needs 7 bits to make the eighth bit store whether the value should be escaped or not. This adds a 256-byte lookup table, but 256 bytes *should* be small enough that very few people will mind, according to my probably not incontrovertible opinion.
The generated assembly isn't clearly better (although has fewer branches), so, I decided to benchmark on three inputs: first on a random 200KiB, then on `/bin/cat`, then on `Cargo.toml` for this repo. In all cases, the generated code ran faster on my machine. (an old i7-8700)
But, if you want to try my benchmarking code for yourself:
<details><summary>Criterion code below. Replace <code>/home/ltdk/rustsrc</code> with the appropriate directory.</summary>
```rust
#![feature(ascii_char)]
#![feature(ascii_char_variants)]
#![feature(const_option)]
#![feature(let_chains)]
use core::ascii;
use core::ops::Range;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{thread_rng, Rng};
const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();
#[inline]
const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 2) };
let mut output = [ascii::Char::Null; N];
output[0] = ascii::Char::ReverseSolidus;
output[1] = a;
(output, 0..2)
}
#[inline]
const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 4) };
let mut output = [ascii::Char::Null; N];
let hi = HEX_DIGITS[(byte >> 4) as usize];
let lo = HEX_DIGITS[(byte & 0xf) as usize];
output[0] = ascii::Char::ReverseSolidus;
output[1] = ascii::Char::SmallX;
output[2] = hi;
output[3] = lo;
(output, 0..4)
}
#[inline]
const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 1) };
let mut output = [ascii::Char::Null; N];
output[0] = a;
(output, 0..1)
}
/// Escapes an ASCII character.
///
/// Returns a buffer and the length of the escaped representation.
const fn escape_ascii_old<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 4) };
match byte {
b'\t' => backslash(ascii::Char::SmallT),
b'\r' => backslash(ascii::Char::SmallR),
b'\n' => backslash(ascii::Char::SmallN),
b'\\' => backslash(ascii::Char::ReverseSolidus),
b'\'' => backslash(ascii::Char::Apostrophe),
b'\"' => backslash(ascii::Char::QuotationMark),
0x00..=0x1F => hex_escape(byte),
_ => match ascii::Char::from_u8(byte) {
Some(a) => verbatim(a),
None => hex_escape(byte),
},
}
}
/// Escapes an ASCII character.
///
/// Returns a buffer and the length of the escaped representation.
const fn escape_ascii_new<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
/// Lookup table helps us determine how to display character.
///
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
/// indicate whether the result is escaped or unescaped.
///
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
/// escaped NUL will not occur.
const LOOKUP: [u8; 256] = {
let mut arr = [0; 256];
let mut idx = 0;
loop {
arr[idx as usize] = match idx {
// use 8th bit to indicate escaped
b'\t' => 0x80 | b't',
b'\r' => 0x80 | b'r',
b'\n' => 0x80 | b'n',
b'\\' => 0x80 | b'\\',
b'\'' => 0x80 | b'\'',
b'"' => 0x80 | b'"',
// use NUL to indicate hex-escaped
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
_ => idx,
};
if idx == 255 {
break;
}
idx += 1;
}
arr
};
let lookup = LOOKUP[byte as usize];
// 8th bit indicates escape
let lookup_escaped = lookup & 0x80 != 0;
// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
if lookup_escaped {
// NUL indicates hex-escaped
if matches!(lookup_ascii, ascii::Char::Null) {
hex_escape(byte)
} else {
backslash(lookup_ascii)
}
} else {
verbatim(lookup_ascii)
}
}
fn escape_bytes(bytes: &[u8], f: impl Fn(u8) -> ([ascii::Char; 4], Range<u8>)) -> Vec<ascii::Char> {
let mut vec = Vec::new();
for b in bytes {
let (buf, range) = f(*b);
vec.extend_from_slice(&buf[range.start as usize..range.end as usize]);
}
vec
}
pub fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("escape_ascii");
group.sample_size(1000);
let rand_200k = &mut [0; 200 * 1024];
thread_rng().fill(&mut rand_200k[..]);
let cat = include_bytes!("/bin/cat");
let cargo_toml = include_bytes!("/home/ltdk/rustsrc/Cargo.toml");
group.bench_function("old_rand", |b| {
b.iter(|| escape_bytes(rand_200k, escape_ascii_old));
});
group.bench_function("new_rand", |b| {
b.iter(|| escape_bytes(rand_200k, escape_ascii_new));
});
group.bench_function("old_bin", |b| {
b.iter(|| escape_bytes(cat, escape_ascii_old));
});
group.bench_function("new_bin", |b| {
b.iter(|| escape_bytes(cat, escape_ascii_new));
});
group.bench_function("old_cargo_toml", |b| {
b.iter(|| escape_bytes(cargo_toml, escape_ascii_old));
});
group.bench_function("new_cargo_toml", |b| {
b.iter(|| escape_bytes(cargo_toml, escape_ascii_new));
});
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
```
</details>
My benchmark results:
```
escape_ascii/old_rand time: [1.6965 ms 1.7006 ms 1.7053 ms]
Found 22 outliers among 1000 measurements (2.20%)
4 (0.40%) high mild
18 (1.80%) high severe
escape_ascii/new_rand time: [1.6749 ms 1.6953 ms 1.7158 ms]
Found 38 outliers among 1000 measurements (3.80%)
38 (3.80%) high mild
escape_ascii/old_bin time: [224.59 µs 225.40 µs 226.33 µs]
Found 39 outliers among 1000 measurements (3.90%)
17 (1.70%) high mild
22 (2.20%) high severe
escape_ascii/new_bin time: [164.86 µs 165.63 µs 166.58 µs]
Found 107 outliers among 1000 measurements (10.70%)
43 (4.30%) high mild
64 (6.40%) high severe
escape_ascii/old_cargo_toml
time: [23.397 µs 23.699 µs 24.014 µs]
Found 204 outliers among 1000 measurements (20.40%)
21 (2.10%) high mild
183 (18.30%) high severe
escape_ascii/new_cargo_toml
time: [16.404 µs 16.438 µs 16.483 µs]
Found 88 outliers among 1000 measurements (8.80%)
56 (5.60%) high mild
32 (3.20%) high severe
```
Random: 1.7006ms => 1.6953ms (<1% speedup)
Binary: 225.40µs => 165.63µs (26% speedup)
Text: 23.699µs => 16.438µs (30% speedup)
|
|
|
|
This makes the following API stable in const contexts:
impl<T> Option<T> {
pub const fn as_mut(&mut self) -> Option<&mut T>;
pub const fn expect(self, msg: &str) -> T;
pub const fn unwrap(self) -> T;
pub const unsafe fn unwrap_unchecked(self) -> T;
pub const fn take(&mut self) -> Option<T>;
pub const fn replace(&mut self, value: T) -> Option<T>;
}
impl<T> Option<&T> {
pub const fn copied(self) -> Option<T>
where T: Copy;
}
impl<T> Option<&mut T> {
pub const fn copied(self) -> Option<T>
where T: Copy;
}
impl<T, E> Option<Result<T, E>> {
pub const fn transpose(self) -> Result<Option<T>, E>
}
impl<T> Option<Option<T>> {
pub const fn flatten(self) -> Option<T>;
}
The following functions make use of the unstable
`const_precise_live_drops` feature:
- `expect`
- `unwrap`
- `unwrap_unchecked`
- `transpose`
- `flatten`
Fixes: <https://github.com/rust-lang/rust/issues/67441>
|
|
Stabilize const `ptr::write*` and `mem::replace`
Since `const_mut_refs` and `const_refs_to_cell` have been stabilized, we may now also stabilize the ability to write to places during const evaluation inside our library API. So, we now propose the `const fn` version of `ptr::write` and its variants. This allows us to also stabilize `mem::replace` and `ptr::replace`.
- const `mem::replace`: https://github.com/rust-lang/rust/issues/83164#issuecomment-2338660862
- const `ptr::write{,_bytes,_unaligned}`: https://github.com/rust-lang/rust/issues/86302#issuecomment-2330275266
Their implementation requires an additional internal stabilization of `const_intrinsic_forget`, which is required for `*::write*` and thus `*::replace`. Thus we const-stabilize the internal intrinsics `forget`, `write_bytes`, and `write_via_move`.
|
|
Depends on stabilizing `const_ptr_write`.
Const-stabilizes:
- `core::mem::replace`
- `core::ptr::replace`
|
|
Const-stabilizes:
- `write`
- `write_bytes`
- `write_unaligned`
In the following paths:
- `core::ptr`
- `core::ptr::NonNull`
- pointer `<*mut T>`
Const-stabilizes the internal `core::intrinsics`:
- `write_bytes`
- `write_via_move`
|
|
This is an implicit requirement of stabilizing `const_ptr_write`.
Const-stabilizes the internal `core::intrinsics`:
- `forget`
|
|
stabilize duration_consts_float
Waiting for FCP in https://github.com/rust-lang/rust/issues/72440 to pass.
`as_millis_f32` and `as_millis_f64` are not stable at all yet, so I moved their const-stability together with their regular stability (tracked at https://github.com/rust-lang/rust/issues/122451).
Fixes https://github.com/rust-lang/rust/issues/72440
|
|
intrinsics fmuladdf{32,64}: expose llvm.fmuladd.* semantics
Add intrinsics `fmuladd{f32,f64}`. This computes `(a * b) + c`, to be fused if the code generator determines that (i) the target instruction set has support for a fused operation, and (ii) that the fused operation is more efficient than the equivalent, separate pair of `mul` and `add` instructions.
https://llvm.org/docs/LangRef.html#llvm-fmuladd-intrinsic
The codegen_cranelift uses the `fma` function from libc, which is a correct implementation, but without the desired performance semantic. I think this requires an update to cranelift to expose a suitable instruction in its IR.
I have not tested with codegen_gcc, but it should behave the same way (using `fma` from libc).
---
This topic has been discussed a few times on Zulip and was suggested, for example, by `@workingjubilee` in [Effect of fma disabled](https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/Effect.20of.20fma.20disabled/near/274179331).
|
|
Stabilise `const_char_encode_utf8`.
Closes: #130512
This PR stabilises the `const_char_encode_utf8` feature gate (i.e. support for `char::encode_utf8` in const scenarios).
Note that the linked tracking issue is currently awaiting FCP.
|
|
stabilize const_result
Waiting for FCP to complete in https://github.com/rust-lang/rust/issues/82814
Fixes #82814
|
|
Stabilize `debug_more_non_exhaustive`
Fixes: https://github.com/rust-lang/rust/issues/127942
|
|
Add intrinsics `fmuladd{f16,f32,f64,f128}`. This computes `(a * b) +
c`, to be fused if the code generator determines that (i) the target
instruction set has support for a fused operation, and (ii) that the
fused operation is more efficient than the equivalent, separate pair
of `mul` and `add` instructions.
https://llvm.org/docs/LangRef.html#llvm-fmuladd-intrinsic
MIRI support is included for f32 and f64.
The codegen_cranelift uses the `fma` function from libc, which is a
correct implementation, but without the desired performance semantic. I
think this requires an update to cranelift to expose a suitable
instruction in its IR.
I have not tested with codegen_gcc, but it should behave the same
way (using `fma` from libc).
|
|
|
|
|
|
Fixing rustDoc for LayoutError.
I started reading the the std lib from start to finish and noticed that this rustdoc comment wasn't correct.
|
|
|
|
Stabilize const `{slice,array}::from_mut`
This PR stabilizes the following APIs as const stable as of rust `1.83`:
```rs
// core::array
pub const fn from_mut<T>(s: &mut T) -> &mut [T; 1];
// core::slice
pub const fn from_mut<T>(s: &mut T) -> &mut [T];
```
This is made possible by `const_mut_refs` being stabilized (yay).
Tracking issue: #90206
|
|
|
|
|
|
AngelicosPhosphoros:better_doc_for_slice_slicing_at_ends, r=cuviper
Add docs about slicing slices at the ends
Closes https://github.com/rust-lang/rust/issues/60783
|
|
Library: Rename "object safe" to "dyn compatible"
Completed T-lang FCP: https://github.com/rust-lang/lang-team/issues/286#issuecomment-2338905118.
Tracking issue: https://github.com/rust-lang/rust/issues/130852
Regarding https://github.com/rust-lang/rust/labels/relnotes, I guess I will manually open a https://github.com/rust-lang/rust/labels/relnotes-tracking-issue since this change affects everything (compiler, library, tools, docs, books, everyday language).
r? ghost
|
|
|
|
|
|
Closes https://github.com/rust-lang/rust/issues/60783
|
|
...because the checks in offset found bugs in a crater run.
|
|
|
|
|
|
Fix typo in primitive_docs.rs
typo introduced in #129559
|
|
|
|
Add a Lint for Pointer to Integer Transmutes in Consts
Fixes #87525
This PR adds a MirLint for pointer to integer transmutes in const functions and associated consts. The implementation closely follows this comment: https://github.com/rust-lang/rust/pull/85769#issuecomment-880969112. More details about the implementation can be found in the comments.
Note: This could break some sound code as mentioned by RalfJung in https://github.com/rust-lang/rust/pull/85769#issuecomment-886491680:
> ... technically const-code could transmute/cast an int to a ptr and then transmute it back and that would be correct -- so the lint will deny some sound code. Does not seem terribly likely though.
References:
1. https://doc.rust-lang.org/std/mem/fn.transmute.html
2. https://doc.rust-lang.org/reference/items/associated-items.html#associated-constants
|
|
make Cell unstably const
Now that we can do interior mutability in `const`, most of the Cell API can be `const fn`. :) The main exception is `set`, because it drops the old value. So from const context one has to use `replace`, which delegates the responsibility for dropping to the caller.
Tracking issue: https://github.com/rust-lang/rust/issues/131283
`as_array_of_cells` is itself still unstable to I added the const-ness to the feature gate for that function and not to `const_cell`, Cc #88248.
r? libs-api
|
|
|
|
move f16/f128 const fn under f16/f128 feature gate
The `*_const` features were added to work around https://github.com/rust-lang/rust/issues/129656, which should not be needed any more.
|
|
|
|
|
|
|
|
eduardosm:stabilize-const_slice_from_raw_parts_mut, r=workingjubilee
Stabilize `const_slice_from_raw_parts_mut`
Stabilizes https://github.com/rust-lang/rust/issues/67456, since https://github.com/rust-lang/rust/issues/57349 has been stabilized.
Stabilized const API:
```rust
// core::ptr
pub const fn slice_from_raw_parts_mut<T>(data: *mut T, len: usize) -> *mut [T];
// core::slice
pub const unsafe fn from_raw_parts_mut<'a, T>(data: *mut T, len: usize) -> &'a mut [T];
// core::ptr::NonNull
pub const fn slice_from_raw_parts(data: NonNull<T>, len: usize) -> Self
```
Closes https://github.com/rust-lang/rust/issues/67456.
r? libs-api
|
|
r=tgross35
Stabilize 5 `const_mut_refs`-dependent API
Since `const_mut_refs` and `const_refs_to_cell` have been stabilized, we now may create mutable references inside our library API. Thus we now stabilize the `const fn` version of these public library APIs which required such in their implementation:
- const `NonNull::as_mut` https://github.com/rust-lang/rust/issues/91822#issuecomment-2338930442
- const `slice::{first,last}_mut`: https://github.com/rust-lang/rust/issues/83570#issuecomment-2334847112
- const `str::as_{mut_ptr,bytes_mut}`: https://github.com/rust-lang/rust/issues/130086#issuecomment-2336408562
- const `str::from_utf8_unchecked_mut`: https://github.com/rust-lang/rust/issues/91005#issuecomment-2359820672
- const `UnsafeCell::get_mut`: https://github.com/rust-lang/rust/issues/88836#issuecomment-2359817772
|
|
Stabilize the `map`/`value` methods on `ControlFlow`
And fix the stability attribute on the `pub use` in `core::ops`.
libs-api in https://github.com/rust-lang/rust/issues/75744#issuecomment-2231214910 seemed reasonably happy with naming for these, so let's try for an FCP.
Summary:
```rust
impl<B, C> ControlFlow<B, C> {
pub fn break_value(self) -> Option<B>;
pub fn map_break<T>(self, f: impl FnOnce(B) -> T) -> ControlFlow<T, C>;
pub fn continue_value(self) -> Option<C>;
pub fn map_continue<T>(self, f: impl FnOnce(C) -> T) -> ControlFlow<B, T>;
}
```
Resolves #75744
``@rustbot`` label +needs-fcp +t-libs-api -t-libs
---
Aside, in case it keeps someone else from going down the same dead end: I looked at the `{break,continue}_value` methods and tried to make them `const` as part of this, but that's disallowed because of not having `const Drop`, so put it back to not even unstably-const.
|
|
Stabilize `const_float_classify`
Tracking issue: https://github.com/rust-lang/rust/issues/72505
Also reverts https://github.com/rust-lang/rust/pull/114486
Closes https://github.com/rust-lang/rust/issues/72505
Stabilized const API:
```rust
impl f32 {
pub const fn is_nan(self) -> bool;
pub const fn is_infinite(self) -> bool;
pub const fn is_finite(self) -> bool;
pub const fn is_subnormal(self) -> bool;
pub const fn is_normal(self) -> bool;
pub const fn classify(self) -> FpCategory;
pub const fn is_sign_positive(self) -> bool;
pub const fn is_sign_negative(self) -> bool;
}
impl f64 {
pub const fn is_nan(self) -> bool;
pub const fn is_infinite(self) -> bool;
pub const fn is_finite(self) -> bool;
pub const fn is_subnormal(self) -> bool;
pub const fn is_normal(self) -> bool;
pub const fn classify(self) -> FpCategory;
pub const fn is_sign_positive(self) -> bool;
pub const fn is_sign_negative(self) -> bool;
}
```
cc `@rust-lang/wg-const-eval` `@rust-lang/libs-api`
|
|
Const-stabilizes:
- `slice::first_mut`
- `slice::split_first_mut`
- `slice::last_mut`
- `slice::split_last_mut`
|
|
Const-stabilizes:
- `UnsafeCell::get_mut`
|
|
Const-stabilizes:
- `NonNull::as_mut`
|
|
Const-stabilizes:
- `str::as_bytes_mut`
- `str::as_mut_ptr`
|
|
Const-stabilizes:
- `str::from_utf8_unchecked_mut`
|
|
Add `[Option<T>; N]::transpose`
This PR as a new unstable libs API, `[Option<T>; N]::transpose`, which permits going from `[Option<T>; N]` to `Option<[T; N]>`.
This new API doesn't have an ACP as it was directly asked by T-libs-api in https://github.com/rust-lang/rust/issues/97601#issuecomment-2372109119:
> [..] but it'd be trivial to provide a helper method `.transpose()` that turns array-of-Option into Option-of-array (**and we think that method should exist**; it already does for array-of-MaybeUninit).
r? libs
|
|
Small optimization for integers Display implementation
This is a first pass to try to speed up a bit integers `Display` implementation. The idea behind this is to reduce the stack usage for the buffer storing the output (shouldn't be visible in bench normally) and some small specialization which benefits a lot to smaller integers like `u8` and `i8`.
Here are the results of the benchmarks:
| bench name | current std | with this PR |
|-|-|-|
| bench_std_fmt::bench_i16_0 | 16.45 ns/iter (+/- 0.25) | 16.50 ns/iter (+/- 0.15) |
| bench_std_fmt::bench_i16_max | 17.83 ns/iter (+/- 0.66) | 17.58 ns/iter (+/- 0.10) |
| bench_std_fmt::bench_i16_min | 20.97 ns/iter (+/- 0.49) | 20.50 ns/iter (+/- 0.28) |
| bench_std_fmt::bench_i32_0 | 16.63 ns/iter (+/- 0.06) | 16.62 ns/iter (+/- 0.07) |
| bench_std_fmt::bench_i32_max | 19.79 ns/iter (+/- 0.43) | 19.55 ns/iter (+/- 0.14) |
| bench_std_fmt::bench_i32_min | 22.97 ns/iter (+/- 0.50) | 22.08 ns/iter (+/- 0.08) |
| bench_std_fmt::bench_i64_0 | 16.63 ns/iter (+/- 0.39) | 16.69 ns/iter (+/- 0.44) |
| bench_std_fmt::bench_i64_half | 19.60 ns/iter (+/- 0.05) | 19.10 ns/iter (+/- 0.05) |
| bench_std_fmt::bench_i64_max | 25.22 ns/iter (+/- 0.34) | 24.43 ns/iter (+/- 0.02) |
| bench_std_fmt::bench_i8_0 | 16.27 ns/iter (+/- 0.32) | 15.80 ns/iter (+/- 0.17) |
| bench_std_fmt::bench_i8_max | 16.71 ns/iter (+/- 0.09) | 16.25 ns/iter (+/- 0.01) |
| bench_std_fmt::bench_i8_min | 20.07 ns/iter (+/- 0.22) | 19.80 ns/iter (+/- 0.30) |
| bench_std_fmt::bench_u128_0 | 21.37 ns/iter (+/- 0.24) | 21.35 ns/iter (+/- 0.35) |
| bench_std_fmt::bench_u128_max | 48.13 ns/iter (+/- 0.20) | 48.78 ns/iter (+/- 0.29) |
| bench_std_fmt::bench_u16_0 | 16.48 ns/iter (+/- 0.46) | 16.03 ns/iter (+/- 0.39) |
| bench_std_fmt::bench_u16_max | 17.31 ns/iter (+/- 0.32) | 17.41 ns/iter (+/- 0.32) |
| bench_std_fmt::bench_u16_min | 16.40 ns/iter (+/- 0.45) | 16.02 ns/iter (+/- 0.39) |
| bench_std_fmt::bench_u32_0 | 16.17 ns/iter (+/- 0.04) | 16.29 ns/iter (+/- 0.16) |
| bench_std_fmt::bench_u32_max | 19.00 ns/iter (+/- 0.10) | 19.16 ns/iter (+/- 0.28) |
| bench_std_fmt::bench_u32_min | 16.16 ns/iter (+/- 0.09) | 16.28 ns/iter (+/- 0.11) |
| bench_std_fmt::bench_u64_0 | 16.22 ns/iter (+/- 0.22) | 16.14 ns/iter (+/- 0.18) |
| bench_std_fmt::bench_u64_half | 19.25 ns/iter (+/- 0.07) | 18.95 ns/iter (+/- 0.05) |
| bench_std_fmt::bench_u64_max | 24.31 ns/iter (+/- 0.08) | 24.18 ns/iter (+/- 0.08) |
| bench_std_fmt::bench_u8_0 | 15.76 ns/iter (+/- 0.08) | 15.66 ns/iter (+/- 0.08) |
| bench_std_fmt::bench_u8_max | 16.53 ns/iter (+/- 0.03) | 16.29 ns/iter (+/- 0.02) |
| bench_std_fmt::bench_u8_min | 15.77 ns/iter (+/- 0.06) | 15.67 ns/iter (+/- 0.02) |
The source code is:
<details>
<summary>source code</summary>
```rust
#![feature(test)]
#![allow(non_snake_case)]
#![allow(clippy::cast_lossless)]
extern crate test;
macro_rules! benches {
($($name:ident($value:expr))*) => {
mod bench_std_fmt {
use std::io::Write;
use test::{Bencher, black_box};
$(
#[bench]
fn $name(b: &mut Bencher) {
let mut buf = Vec::with_capacity(40);
b.iter(|| {
buf.clear();
write!(&mut buf, "{}", black_box($value)).unwrap();
black_box(&buf);
});
}
)*
}
}
}
benches! {
bench_u64_0(0u64)
bench_u64_half(u32::max_value() as u64)
bench_u64_max(u64::max_value())
bench_i64_0(0i64)
bench_i64_half(i32::max_value() as i64)
bench_i64_max(i64::max_value())
bench_u16_0(0u16)
bench_u16_min(u16::min_value())
bench_u16_max(u16::max_value())
bench_i16_0(0i16)
bench_i16_min(i16::min_value())
bench_i16_max(i16::max_value())
bench_u128_0(0u128)
bench_u128_max(u128::max_value())
bench_i8_0(0i8)
bench_i8_min(i8::min_value())
bench_i8_max(i8::max_value())
bench_u8_0(0u8)
bench_u8_min(u8::min_value())
bench_u8_max(u8::max_value())
bench_u32_0(0u32)
bench_u32_min(u32::min_value())
bench_u32_max(u32::max_value())
bench_i32_0(0i32)
bench_i32_min(i32::min_value())
bench_i32_max(i32::max_value())
}
```
</details>
And then I ran the equivalent code (source code below) in callgrind with [callgrind_differ](https://github.com/Ethiraric/callgrind_differ) to generate a nice output and here's the result:
```
core::fmt::num::imp::<impl core::fmt::Display for i16>::fmt | 1300000 | - 70000 - 5.385% 1230000
core::fmt::num::imp::<impl core::fmt::Display for i32>::fmt | 1910000 | - 100000 - 5.236% 1810000
core::fmt::num::imp::<impl core::fmt::Display for i64>::fmt | 2430000 | - 110000 - 4.527% 2320000
core::fmt::num::imp::<impl core::fmt::Display for i8>::fmt | 1080000 | - 170000 - 15.741% 910000
core::fmt::num::imp::<impl core::fmt::Display for u16>::fmt | 960000 | + 10000 + 1.042% 970000
core::fmt::num::imp::<impl core::fmt::Display for u32>::fmt | 1300000 | + 30000 + 2.308% 1330000
core::fmt::num::imp::<impl core::fmt::Display for u8>::fmt | 820000 | - 30000 - 3.659% 790000
```
<details>
<summary>Source code</summary>
```rust
#![feature(test)]
extern crate test;
use std::io::{stdout, Write};
use std::io::StdoutLock;
use test::black_box;
macro_rules! benches {
($handle:ident, $buf:ident, $($name:ident($value:expr))*) => {
$(
fn $name(handle: &mut StdoutLock, buf: &mut Vec<u8>) {
for _ in 0..10000 {
buf.clear();
write!(buf, "{}", black_box($value)).unwrap();
handle.write_all(buf);
}
}
$name(&mut $handle, &mut $buf);
)*
}
}
fn main() {
let mut handle = stdout().lock();
let mut buf = Vec::with_capacity(40);
benches! {
handle, buf,
bench_u64_0(0u64)
bench_u64_half(u32::max_value() as u64)
bench_u64_max(u64::max_value())
bench_i64_0(0i64)
bench_i64_half(i32::max_value() as i64)
bench_i64_max(i64::max_value())
bench_u16_0(0u16)
bench_u16_min(u16::min_value())
bench_u16_max(u16::max_value())
bench_i16_0(0i16)
bench_i16_min(i16::min_value())
bench_i16_max(i16::max_value())
bench_u128_0(0u128)
bench_u128_max(u128::max_value())
bench_i8_0(0i8)
bench_i8_min(i8::min_value())
bench_i8_max(i8::max_value())
bench_u8_0(0u8)
bench_u8_min(u8::min_value())
bench_u8_max(u8::max_value())
bench_i32_0(0i32)
bench_i32_min(i32::min_value())
bench_i32_max(i32::max_value())
bench_u32_0(0u32)
bench_u32_min(u32::min_value())
bench_u32_max(u32::max_value())
}
}
```
</details>
The next step would be to specialize the `ToString` implementation so it doesn't go through the `Display` trait. I'm not sure if it will improve anything but I think it's worth a try.
r? `@Amanieu`
|
|
|