rust - https://github.com/rust-lang/rust

Age	Commit message (Collapse)	Author	Lines
2024-10-14	replace placeholder version	Josh Stone	-136/+127

2024-10-13	Auto merge of #125679 - clarfonthey:escape_ascii, r=joboet	bors	-23/+91
	Optimize `escape_ascii` using a lookup table Based upon my suggestion here: https://github.com/rust-lang/rust/pull/125340#issuecomment-2130441817 Effectively, we can take advantage of the fact that ASCII only needs 7 bits to make the eighth bit store whether the value should be escaped or not. This adds a 256-byte lookup table, but 256 bytes should be small enough that very few people will mind, according to my probably not incontrovertible opinion. The generated assembly isn't clearly better (although has fewer branches), so, I decided to benchmark on three inputs: first on a random 200KiB, then on `/bin/cat`, then on `Cargo.toml` for this repo. In all cases, the generated code ran faster on my machine. (an old i7-8700) But, if you want to try my benchmarking code for yourself: <details><summary>Criterion code below. Replace <code>/home/ltdk/rustsrc</code> with the appropriate directory.</summary> ```rust #![feature(ascii_char)] #![feature(ascii_char_variants)] #![feature(const_option)] #![feature(let_chains)] use core::ascii; use core::ops::Range; use criterion::{criterion_group, criterion_main, Criterion}; use rand::{thread_rng, Rng}; const HEX_DIGITS: [ascii::Char; 16] = b"0123456789abcdef".as_ascii().unwrap(); #[inline] const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) { const { assert!(N >= 2) }; let mut output = [ascii::Char::Null; N]; output[0] = ascii::Char::ReverseSolidus; output[1] = a; (output, 0..2) } #[inline] const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) { const { assert!(N >= 4) }; let mut output = [ascii::Char::Null; N]; let hi = HEX_DIGITS[(byte >> 4) as usize]; let lo = HEX_DIGITS[(byte & 0xf) as usize]; output[0] = ascii::Char::ReverseSolidus; output[1] = ascii::Char::SmallX; output[2] = hi; output[3] = lo; (output, 0..4) } #[inline] const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) { const { assert!(N >= 1) }; let mut output = [ascii::Char::Null; N]; output[0] = a; (output, 0..1) } /// Escapes an ASCII character. /// /// Returns a buffer and the length of the escaped representation. const fn escape_ascii_old<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) { const { assert!(N >= 4) }; match byte { b'\t' => backslash(ascii::Char::SmallT), b'\r' => backslash(ascii::Char::SmallR), b'\n' => backslash(ascii::Char::SmallN), b'\\' => backslash(ascii::Char::ReverseSolidus), b'\'' => backslash(ascii::Char::Apostrophe), b'\"' => backslash(ascii::Char::QuotationMark), 0x00..=0x1F => hex_escape(byte), _ => match ascii::Char::from_u8(byte) { Some(a) => verbatim(a), None => hex_escape(byte), }, } } /// Escapes an ASCII character. /// /// Returns a buffer and the length of the escaped representation. const fn escape_ascii_new<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) { /// Lookup table helps us determine how to display character. /// /// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to /// indicate whether the result is escaped or unescaped. /// /// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since /// escaped NUL will not occur. const LOOKUP: [u8; 256] = { let mut arr = [0; 256]; let mut idx = 0; loop { arr[idx as usize] = match idx { // use 8th bit to indicate escaped b'\t' => 0x80 \| b't', b'\r' => 0x80 \| b'r', b'\n' => 0x80 \| b'n', b'\\' => 0x80 \| b'\\', b'\'' => 0x80 \| b'\'', b'"' => 0x80 \| b'"', // use NUL to indicate hex-escaped 0x00..=0x1F \| 0x7F..=0xFF => 0x80 \| b'\0', _ => idx, }; if idx == 255 { break; } idx += 1; } arr }; let lookup = LOOKUP[byte as usize]; // 8th bit indicates escape let lookup_escaped = lookup & 0x80 != 0; // SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character. let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) }; if lookup_escaped { // NUL indicates hex-escaped if matches!(lookup_ascii, ascii::Char::Null) { hex_escape(byte) } else { backslash(lookup_ascii) } } else { verbatim(lookup_ascii) } } fn escape_bytes(bytes: &[u8], f: impl Fn(u8) -> ([ascii::Char; 4], Range<u8>)) -> Vec<ascii::Char> { let mut vec = Vec::new(); for b in bytes { let (buf, range) = f(b); vec.extend_from_slice(&buf[range.start as usize..range.end as usize]); } vec } pub fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("escape_ascii"); group.sample_size(1000); let rand_200k = &mut [0; 200 * 1024]; thread_rng().fill(&mut rand_200k[..]); let cat = include_bytes!("/bin/cat"); let cargo_toml = include_bytes!("/home/ltdk/rustsrc/Cargo.toml"); group.bench_function("old_rand", \|b\| { b.iter(\|\| escape_bytes(rand_200k, escape_ascii_old)); }); group.bench_function("new_rand", \|b\| { b.iter(\|\| escape_bytes(rand_200k, escape_ascii_new)); }); group.bench_function("old_bin", \|b\| { b.iter(\|\| escape_bytes(cat, escape_ascii_old)); }); group.bench_function("new_bin", \|b\| { b.iter(\|\| escape_bytes(cat, escape_ascii_new)); }); group.bench_function("old_cargo_toml", \|b\| { b.iter(\|\| escape_bytes(cargo_toml, escape_ascii_old)); }); group.bench_function("new_cargo_toml", \|b\| { b.iter(\|\| escape_bytes(cargo_toml, escape_ascii_new)); }); group.finish(); } criterion_group!(benches, criterion_benchmark); criterion_main!(benches); ``` </details> My benchmark results: ``` escape_ascii/old_rand time: [1.6965 ms 1.7006 ms 1.7053 ms] Found 22 outliers among 1000 measurements (2.20%) 4 (0.40%) high mild 18 (1.80%) high severe escape_ascii/new_rand time: [1.6749 ms 1.6953 ms 1.7158 ms] Found 38 outliers among 1000 measurements (3.80%) 38 (3.80%) high mild escape_ascii/old_bin time: [224.59 µs 225.40 µs 226.33 µs] Found 39 outliers among 1000 measurements (3.90%) 17 (1.70%) high mild 22 (2.20%) high severe escape_ascii/new_bin time: [164.86 µs 165.63 µs 166.58 µs] Found 107 outliers among 1000 measurements (10.70%) 43 (4.30%) high mild 64 (6.40%) high severe escape_ascii/old_cargo_toml time: [23.397 µs 23.699 µs 24.014 µs] Found 204 outliers among 1000 measurements (20.40%) 21 (2.10%) high mild 183 (18.30%) high severe escape_ascii/new_cargo_toml time: [16.404 µs 16.438 µs 16.483 µs] Found 88 outliers among 1000 measurements (8.80%) 56 (5.60%) high mild 32 (3.20%) high severe ``` Random: 1.7006ms => 1.6953ms (<1% speedup) Binary: 225.40µs => 165.63µs (26% speedup) Text: 23.699µs => 16.438µs (30% speedup)
2024-10-13	switch unicode-data back to 'static'	Ralf Jung	-8/+9

2024-10-12	Stabilize `const_option`	Trevor Gross	-16/+19
	This makes the following API stable in const contexts: impl<T> Option<T> { pub const fn as_mut(&mut self) -> Option<&mut T>; pub const fn expect(self, msg: &str) -> T; pub const fn unwrap(self) -> T; pub const unsafe fn unwrap_unchecked(self) -> T; pub const fn take(&mut self) -> Option<T>; pub const fn replace(&mut self, value: T) -> Option<T>; } impl<T> Option<&T> { pub const fn copied(self) -> Option<T> where T: Copy; } impl<T> Option<&mut T> { pub const fn copied(self) -> Option<T> where T: Copy; } impl<T, E> Option<Result<T, E>> { pub const fn transpose(self) -> Result<Option<T>, E> } impl<T> Option<Option<T>> { pub const fn flatten(self) -> Option<T>; } The following functions make use of the unstable `const_precise_live_drops` feature: - `expect` - `unwrap` - `unwrap_unchecked` - `transpose` - `flatten` Fixes: <https://github.com/rust-lang/rust/issues/67441>
2024-10-12	Rollup merge of #130954 - workingjubilee:stabilize-const-mut-fn, r=RalfJung	Trevor Gross	-17/+17
	Stabilize const `ptr::write` and `mem::replace` Since `const_mut_refs` and `const_refs_to_cell` have been stabilized, we may now also stabilize the ability to write to places during const evaluation inside our library API. So, we now propose the `const fn` version of `ptr::write` and its variants. This allows us to also stabilize `mem::replace` and `ptr::replace`. - const `mem::replace`: https://github.com/rust-lang/rust/issues/83164#issuecomment-2338660862 - const `ptr::write{,_bytes,_unaligned}`: https://github.com/rust-lang/rust/issues/86302#issuecomment-2330275266 Their implementation requires an additional internal stabilization of `const_intrinsic_forget`, which is required for `::write` and thus `::replace`. Thus we const-stabilize the internal intrinsics `forget`, `write_bytes`, and `write_via_move`.
2024-10-12	library: Stabilize `const_replace`	Jubilee Young	-3/+4
	Depends on stabilizing `const_ptr_write`. Const-stabilizes: - `core::mem::replace` - `core::ptr::replace`
2024-10-12	library: Stabilize `const_ptr_write`	Jubilee Young	-12/+12
	Const-stabilizes: - `write` - `write_bytes` - `write_unaligned` In the following paths: - `core::ptr` - `core::ptr::NonNull` - pointer `<*mut T>` Const-stabilizes the internal `core::intrinsics`: - `write_bytes` - `write_via_move`
2024-10-12	library: Stabilize `const_intrinsic_forget`	Jubilee Young	-2/+1
	This is an implicit requirement of stabilizing `const_ptr_write`. Const-stabilizes the internal `core::intrinsics`: - `forget`
2024-10-11	Rollup merge of #131289 - RalfJung:duration_consts_float, r=tgross35	Trevor Gross	-7/+6
	stabilize duration_consts_float Waiting for FCP in https://github.com/rust-lang/rust/issues/72440 to pass. `as_millis_f32` and `as_millis_f64` are not stable at all yet, so I moved their const-stability together with their regular stability (tracked at https://github.com/rust-lang/rust/issues/122451). Fixes https://github.com/rust-lang/rust/issues/72440
2024-10-11	Rollup merge of #124874 - jedbrown:float-mul-add-fast, r=saethlin	Trevor Gross	-0/+53
	intrinsics fmuladdf{32,64}: expose llvm.fmuladd.* semantics Add intrinsics `fmuladd{f32,f64}`. This computes `(a * b) + c`, to be fused if the code generator determines that (i) the target instruction set has support for a fused operation, and (ii) that the fused operation is more efficient than the equivalent, separate pair of `mul` and `add` instructions. https://llvm.org/docs/LangRef.html#llvm-fmuladd-intrinsic The codegen_cranelift uses the `fma` function from libc, which is a correct implementation, but without the desired performance semantic. I think this requires an update to cranelift to expose a suitable instruction in its IR. I have not tested with codegen_gcc, but it should behave the same way (using `fma` from libc). --- This topic has been discussed a few times on Zulip and was suggested, for example, by `@workingjubilee` in [Effect of fma disabled](https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/Effect.20of.20fma.20disabled/near/274179331).
2024-10-11	Rollup merge of #131463 - bjoernager:const-char-encode-utf8, r=RalfJung	Trevor Gross	-3/+5
	Stabilise `const_char_encode_utf8`. Closes: #130512 This PR stabilises the `const_char_encode_utf8` feature gate (i.e. support for `char::encode_utf8` in const scenarios). Note that the linked tracking issue is currently awaiting FCP.
2024-10-11	Rollup merge of #131287 - RalfJung:const_result, r=tgross35	Trevor Gross	-4/+9
	stabilize const_result Waiting for FCP to complete in https://github.com/rust-lang/rust/issues/82814 Fixes #82814
2024-10-11	Rollup merge of #131109 - tgross35:stabilize-debug_more_non_exhaustive, r=joboet	Trevor Gross	-12/+4
	Stabilize `debug_more_non_exhaustive` Fixes: https://github.com/rust-lang/rust/issues/127942
2024-10-11	intrinsics.fmuladdf{16,32,64,128}: expose llvm.fmuladd.* semantics	Jed Brown	-0/+53
	Add intrinsics `fmuladd{f16,f32,f64,f128}`. This computes `(a * b) + c`, to be fused if the code generator determines that (i) the target instruction set has support for a fused operation, and (ii) that the fused operation is more efficient than the equivalent, separate pair of `mul` and `add` instructions. https://llvm.org/docs/LangRef.html#llvm-fmuladd-intrinsic MIRI support is included for f32 and f64. The codegen_cranelift uses the `fma` function from libc, which is a correct implementation, but without the desired performance semantic. I think this requires an update to cranelift to expose a suitable instruction in its IR. I have not tested with codegen_gcc, but it should behave the same way (using `fma` from libc).
2024-10-11	stabilize const_result	Ralf Jung	-4/+9

2024-10-11	stabilize duration_consts_float	Ralf Jung	-7/+6

2024-10-11	Rollup merge of #131512 - j7nw4r:master, r=jhpratt	Matthias Krüger	-1/+2
	Fixing rustDoc for LayoutError. I started reading the the std lib from start to finish and noticed that this rustdoc comment wasn't correct.
2024-10-10	Fixing rustDoc for LayoutError.	Johnathan W	-1/+2

2024-10-10	Rollup merge of #130538 - ultrabear:ultrabear_const_from_ref, r=workingjubilee	Matthias Krüger	-4/+4
	Stabilize const `{slice,array}::from_mut` This PR stabilizes the following APIs as const stable as of rust `1.83`: ```rs // core::array pub const fn from_mut<T>(s: &mut T) -> &mut [T; 1]; // core::slice pub const fn from_mut<T>(s: &mut T) -> &mut [T]; ``` This is made possible by `const_mut_refs` being stabilized (yay). Tracking issue: #90206
2024-10-10	Stabilise 'const_char_encode_utf8';	Gabriel Bjørnager Jensen	-3/+5

2024-10-09	Optimize escape_ascii	ltdk	-23/+91

2024-10-09	Rollup merge of #131383 - ↵	Matthias Krüger	-0/+21
	AngelicosPhosphoros:better_doc_for_slice_slicing_at_ends, r=cuviper Add docs about slicing slices at the ends Closes https://github.com/rust-lang/rust/issues/60783
2024-10-09	Rollup merge of #130827 - fmease:library-mv-obj-save-dyn-compat, r=ibraheemdev	Matthias Krüger	-18/+22
	Library: Rename "object safe" to "dyn compatible" Completed T-lang FCP: https://github.com/rust-lang/lang-team/issues/286#issuecomment-2338905118. Tracking issue: https://github.com/rust-lang/rust/issues/130852 Regarding https://github.com/rust-lang/rust/labels/relnotes, I guess I will manually open a https://github.com/rust-lang/rust/labels/relnotes-tracking-issue since this change affects everything (compiler, library, tools, docs, books, everyday language). r? ghost
2024-10-09	Library: Rename "object safe" to "dyn compatible"	León Orell Valerian Liehr	-18/+22

2024-10-09	stabilize `{slice,array}::from_mut`	ultrabear	-4/+4

2024-10-08	Add docs about slicing slices at the ends	AngelicosPhosphoros	-0/+21
	Closes https://github.com/rust-lang/rust/issues/60783
2024-10-07	cfg out checks in add and sub but not offset	Ben Kimock	-0/+8
	...because the checks in offset found bugs in a crater run.
2024-10-07	Add precondition checks to ptr::offset, ptr::add, ptr::sub	Ben Kimock	-3/+173

2024-10-06	grammar fix	dacian	-1/+1

2024-10-06	Rollup merge of #131316 - programmerjake:patch-4, r=Noratrieb	Matthias Krüger	-1/+1
	Fix typo in primitive_docs.rs typo introduced in #129559
2024-10-05	Fix typo in primitive_docs.rs	Jacob Lifshay	-1/+1

2024-10-06	Auto merge of #130540 - veera-sivarajan:fix-87525, r=estebank	bors	-0/+1
	Add a Lint for Pointer to Integer Transmutes in Consts Fixes #87525 This PR adds a MirLint for pointer to integer transmutes in const functions and associated consts. The implementation closely follows this comment: https://github.com/rust-lang/rust/pull/85769#issuecomment-880969112. More details about the implementation can be found in the comments. Note: This could break some sound code as mentioned by RalfJung in https://github.com/rust-lang/rust/pull/85769#issuecomment-886491680: > ... technically const-code could transmute/cast an int to a ptr and then transmute it back and that would be correct -- so the lint will deny some sound code. Does not seem terribly likely though. References: 1. https://doc.rust-lang.org/std/mem/fn.transmute.html 2. https://doc.rust-lang.org/reference/items/associated-items.html#associated-constants
2024-10-05	Rollup merge of #131281 - RalfJung:const-cell, r=Amanieu	Matthias Krüger	-6/+12
	make Cell unstably const Now that we can do interior mutability in `const`, most of the Cell API can be `const fn`. :) The main exception is `set`, because it drops the old value. So from const context one has to use `replace`, which delegates the responsibility for dropping to the caller. Tracking issue: https://github.com/rust-lang/rust/issues/131283 `as_array_of_cells` is itself still unstable to I added the const-ness to the feature gate for that function and not to `const_cell`, Cc #88248. r? libs-api
2024-10-05	Add a Lint for Pointer to Integer Transmutes in Consts	Veera	-0/+1

2024-10-05	Rollup merge of #131256 - RalfJung:f16-f128-const, r=ibraheemdev	Matthias Krüger	-18/+16
	move f16/f128 const fn under f16/f128 feature gate The `*_const` features were added to work around https://github.com/rust-lang/rust/issues/129656, which should not be needed any more.
2024-10-05	make Cell unstably const	Ralf Jung	-6/+12

2024-10-05	move f16/f128 const fn under f16/f128 feature gate	Ralf Jung	-18/+16

2024-10-05	Stabilize `const_slice_split_at_mut` and `const_slice_first_last_chunk`	onestacked	-8/+14

2024-10-04	Rollup merge of #130403 - ↵	Jubilee	-4/+7
	eduardosm:stabilize-const_slice_from_raw_parts_mut, r=workingjubilee Stabilize `const_slice_from_raw_parts_mut` Stabilizes https://github.com/rust-lang/rust/issues/67456, since https://github.com/rust-lang/rust/issues/57349 has been stabilized. Stabilized const API: ```rust // core::ptr pub const fn slice_from_raw_parts_mut<T>(data: mut T, len: usize) -> mut [T]; // core::slice pub const unsafe fn from_raw_parts_mut<'a, T>(data: *mut T, len: usize) -> &'a mut [T]; // core::ptr::NonNull pub const fn slice_from_raw_parts(data: NonNull<T>, len: usize) -> Self ``` Closes https://github.com/rust-lang/rust/issues/67456. r? libs-api
2024-10-04	Rollup merge of #131177 - workingjubilee:stabilize-const-mut-referees, ↵	Jubilee	-13/+21
	r=tgross35 Stabilize 5 `const_mut_refs`-dependent API Since `const_mut_refs` and `const_refs_to_cell` have been stabilized, we now may create mutable references inside our library API. Thus we now stabilize the `const fn` version of these public library APIs which required such in their implementation: - const `NonNull::as_mut` https://github.com/rust-lang/rust/issues/91822#issuecomment-2338930442 - const `slice::{first,last}_mut`: https://github.com/rust-lang/rust/issues/83570#issuecomment-2334847112 - const `str::as_{mut_ptr,bytes_mut}`: https://github.com/rust-lang/rust/issues/130086#issuecomment-2336408562 - const `str::from_utf8_unchecked_mut`: https://github.com/rust-lang/rust/issues/91005#issuecomment-2359820672 - const `UnsafeCell::get_mut`: https://github.com/rust-lang/rust/issues/88836#issuecomment-2359817772
2024-10-04	Rollup merge of #130518 - scottmcm:stabilize-controlflow-extra, r=dtolnay	Jubilee	-15/+7
	Stabilize the `map`/`value` methods on `ControlFlow` And fix the stability attribute on the `pub use` in `core::ops`. libs-api in https://github.com/rust-lang/rust/issues/75744#issuecomment-2231214910 seemed reasonably happy with naming for these, so let's try for an FCP. Summary: ```rust impl<B, C> ControlFlow<B, C> { pub fn break_value(self) -> Option<B>; pub fn map_break<T>(self, f: impl FnOnce(B) -> T) -> ControlFlow<T, C>; pub fn continue_value(self) -> Option<C>; pub fn map_continue<T>(self, f: impl FnOnce(C) -> T) -> ControlFlow<B, T>; } ``` Resolves #75744 ``@rustbot`` label +needs-fcp +t-libs-api -t-libs --- Aside, in case it keeps someone else from going down the same dead end: I looked at the `{break,continue}_value` methods and tried to make them `const` as part of this, but that's disallowed because of not having `const Drop`, so put it back to not even unstably-const.
2024-10-04	Auto merge of #130157 - eduardosm:stabilize-const_float_classify, r=RalfJung	bors	-31/+28
	Stabilize `const_float_classify` Tracking issue: https://github.com/rust-lang/rust/issues/72505 Also reverts https://github.com/rust-lang/rust/pull/114486 Closes https://github.com/rust-lang/rust/issues/72505 Stabilized const API: ```rust impl f32 { pub const fn is_nan(self) -> bool; pub const fn is_infinite(self) -> bool; pub const fn is_finite(self) -> bool; pub const fn is_subnormal(self) -> bool; pub const fn is_normal(self) -> bool; pub const fn classify(self) -> FpCategory; pub const fn is_sign_positive(self) -> bool; pub const fn is_sign_negative(self) -> bool; } impl f64 { pub const fn is_nan(self) -> bool; pub const fn is_infinite(self) -> bool; pub const fn is_finite(self) -> bool; pub const fn is_subnormal(self) -> bool; pub const fn is_normal(self) -> bool; pub const fn classify(self) -> FpCategory; pub const fn is_sign_positive(self) -> bool; pub const fn is_sign_negative(self) -> bool; } ``` cc `@rust-lang/wg-const-eval` `@rust-lang/libs-api`
2024-10-02	library: Stabilize `const_slice_first_last`	Jubilee Young	-4/+8
	Const-stabilizes: - `slice::first_mut` - `slice::split_first_mut` - `slice::last_mut` - `slice::split_last_mut`
2024-10-02	library: Stabilize `const_unsafecell_get_mut`	Jubilee Young	-2/+2
	Const-stabilizes: - `UnsafeCell::get_mut`
2024-10-02	library: Stabilize `const_ptr_as_ref`	Jubilee Young	-2/+2
	Const-stabilizes: - `NonNull::as_mut`
2024-10-02	library: Stabilize `const_str_as_mut`	Jubilee Young	-3/+5
	Const-stabilizes: - `str::as_bytes_mut` - `str::as_mut_ptr`
2024-10-02	library: Stabilize `const_str_from_utf8_unchecked_mut`	Jubilee Young	-2/+4
	Const-stabilizes: - `str::from_utf8_unchecked_mut`
2024-10-02	Auto merge of #130829 - Urgau:option_array_transpose, r=ibraheemdev	bors	-0/+24
	Add `[Option<T>; N]::transpose` This PR as a new unstable libs API, `[Option<T>; N]::transpose`, which permits going from `[Option<T>; N]` to `Option<[T; N]>`. This new API doesn't have an ACP as it was directly asked by T-libs-api in https://github.com/rust-lang/rust/issues/97601#issuecomment-2372109119: > [..] but it'd be trivial to provide a helper method `.transpose()` that turns array-of-Option into Option-of-array (and we think that method should exist; it already does for array-of-MaybeUninit). r? libs
2024-10-01	Auto merge of #128204 - GuillaumeGomez:integers-opti, r=workingjubilee	bors	-81/+131
	Small optimization for integers Display implementation This is a first pass to try to speed up a bit integers `Display` implementation. The idea behind this is to reduce the stack usage for the buffer storing the output (shouldn't be visible in bench normally) and some small specialization which benefits a lot to smaller integers like `u8` and `i8`. Here are the results of the benchmarks: \| bench name \| current std \| with this PR \| \|-\|-\|-\| \| bench_std_fmt::bench_i16_0 \| 16.45 ns/iter (+/- 0.25) \| 16.50 ns/iter (+/- 0.15) \| \| bench_std_fmt::bench_i16_max \| 17.83 ns/iter (+/- 0.66) \| 17.58 ns/iter (+/- 0.10) \| \| bench_std_fmt::bench_i16_min \| 20.97 ns/iter (+/- 0.49) \| 20.50 ns/iter (+/- 0.28) \| \| bench_std_fmt::bench_i32_0 \| 16.63 ns/iter (+/- 0.06) \| 16.62 ns/iter (+/- 0.07) \| \| bench_std_fmt::bench_i32_max \| 19.79 ns/iter (+/- 0.43) \| 19.55 ns/iter (+/- 0.14) \| \| bench_std_fmt::bench_i32_min \| 22.97 ns/iter (+/- 0.50) \| 22.08 ns/iter (+/- 0.08) \| \| bench_std_fmt::bench_i64_0 \| 16.63 ns/iter (+/- 0.39) \| 16.69 ns/iter (+/- 0.44) \| \| bench_std_fmt::bench_i64_half \| 19.60 ns/iter (+/- 0.05) \| 19.10 ns/iter (+/- 0.05) \| \| bench_std_fmt::bench_i64_max \| 25.22 ns/iter (+/- 0.34) \| 24.43 ns/iter (+/- 0.02) \| \| bench_std_fmt::bench_i8_0 \| 16.27 ns/iter (+/- 0.32) \| 15.80 ns/iter (+/- 0.17) \| \| bench_std_fmt::bench_i8_max \| 16.71 ns/iter (+/- 0.09) \| 16.25 ns/iter (+/- 0.01) \| \| bench_std_fmt::bench_i8_min \| 20.07 ns/iter (+/- 0.22) \| 19.80 ns/iter (+/- 0.30) \| \| bench_std_fmt::bench_u128_0 \| 21.37 ns/iter (+/- 0.24) \| 21.35 ns/iter (+/- 0.35) \| \| bench_std_fmt::bench_u128_max \| 48.13 ns/iter (+/- 0.20) \| 48.78 ns/iter (+/- 0.29) \| \| bench_std_fmt::bench_u16_0 \| 16.48 ns/iter (+/- 0.46) \| 16.03 ns/iter (+/- 0.39) \| \| bench_std_fmt::bench_u16_max \| 17.31 ns/iter (+/- 0.32) \| 17.41 ns/iter (+/- 0.32) \| \| bench_std_fmt::bench_u16_min \| 16.40 ns/iter (+/- 0.45) \| 16.02 ns/iter (+/- 0.39) \| \| bench_std_fmt::bench_u32_0 \| 16.17 ns/iter (+/- 0.04) \| 16.29 ns/iter (+/- 0.16) \| \| bench_std_fmt::bench_u32_max \| 19.00 ns/iter (+/- 0.10) \| 19.16 ns/iter (+/- 0.28) \| \| bench_std_fmt::bench_u32_min \| 16.16 ns/iter (+/- 0.09) \| 16.28 ns/iter (+/- 0.11) \| \| bench_std_fmt::bench_u64_0 \| 16.22 ns/iter (+/- 0.22) \| 16.14 ns/iter (+/- 0.18) \| \| bench_std_fmt::bench_u64_half \| 19.25 ns/iter (+/- 0.07) \| 18.95 ns/iter (+/- 0.05) \| \| bench_std_fmt::bench_u64_max \| 24.31 ns/iter (+/- 0.08) \| 24.18 ns/iter (+/- 0.08) \| \| bench_std_fmt::bench_u8_0 \| 15.76 ns/iter (+/- 0.08) \| 15.66 ns/iter (+/- 0.08) \| \| bench_std_fmt::bench_u8_max \| 16.53 ns/iter (+/- 0.03) \| 16.29 ns/iter (+/- 0.02) \| \| bench_std_fmt::bench_u8_min \| 15.77 ns/iter (+/- 0.06) \| 15.67 ns/iter (+/- 0.02) \| The source code is: <details> <summary>source code</summary> ```rust #![feature(test)] #![allow(non_snake_case)] #![allow(clippy::cast_lossless)] extern crate test; macro_rules! benches { ($($name:ident($value:expr))) => { mod bench_std_fmt { use std::io::Write; use test::{Bencher, black_box}; $( #[bench] fn $name(b: &mut Bencher) { let mut buf = Vec::with_capacity(40); b.iter(\|\| { buf.clear(); write!(&mut buf, "{}", black_box($value)).unwrap(); black_box(&buf); }); } ) } } } benches! { bench_u64_0(0u64) bench_u64_half(u32::max_value() as u64) bench_u64_max(u64::max_value()) bench_i64_0(0i64) bench_i64_half(i32::max_value() as i64) bench_i64_max(i64::max_value()) bench_u16_0(0u16) bench_u16_min(u16::min_value()) bench_u16_max(u16::max_value()) bench_i16_0(0i16) bench_i16_min(i16::min_value()) bench_i16_max(i16::max_value()) bench_u128_0(0u128) bench_u128_max(u128::max_value()) bench_i8_0(0i8) bench_i8_min(i8::min_value()) bench_i8_max(i8::max_value()) bench_u8_0(0u8) bench_u8_min(u8::min_value()) bench_u8_max(u8::max_value()) bench_u32_0(0u32) bench_u32_min(u32::min_value()) bench_u32_max(u32::max_value()) bench_i32_0(0i32) bench_i32_min(i32::min_value()) bench_i32_max(i32::max_value()) } ``` </details> And then I ran the equivalent code (source code below) in callgrind with [callgrind_differ](https://github.com/Ethiraric/callgrind_differ) to generate a nice output and here's the result: ``` core::fmt::num::imp::<impl core::fmt::Display for i16>::fmt \| 1300000 \| - 70000 - 5.385% 1230000 core::fmt::num::imp::<impl core::fmt::Display for i32>::fmt \| 1910000 \| - 100000 - 5.236% 1810000 core::fmt::num::imp::<impl core::fmt::Display for i64>::fmt \| 2430000 \| - 110000 - 4.527% 2320000 core::fmt::num::imp::<impl core::fmt::Display for i8>::fmt \| 1080000 \| - 170000 - 15.741% 910000 core::fmt::num::imp::<impl core::fmt::Display for u16>::fmt \| 960000 \| + 10000 + 1.042% 970000 core::fmt::num::imp::<impl core::fmt::Display for u32>::fmt \| 1300000 \| + 30000 + 2.308% 1330000 core::fmt::num::imp::<impl core::fmt::Display for u8>::fmt \| 820000 \| - 30000 - 3.659% 790000 ``` <details> <summary>Source code</summary> ```rust #![feature(test)] extern crate test; use std::io::{stdout, Write}; use std::io::StdoutLock; use test::black_box; macro_rules! benches { ($handle:ident, $buf:ident, $($name:ident($value:expr))) => { $( fn $name(handle: &mut StdoutLock, buf: &mut Vec<u8>) { for _ in 0..10000 { buf.clear(); write!(buf, "{}", black_box($value)).unwrap(); handle.write_all(buf); } } $name(&mut $handle, &mut $buf); ) } } fn main() { let mut handle = stdout().lock(); let mut buf = Vec::with_capacity(40); benches! { handle, buf, bench_u64_0(0u64) bench_u64_half(u32::max_value() as u64) bench_u64_max(u64::max_value()) bench_i64_0(0i64) bench_i64_half(i32::max_value() as i64) bench_i64_max(i64::max_value()) bench_u16_0(0u16) bench_u16_min(u16::min_value()) bench_u16_max(u16::max_value()) bench_i16_0(0i16) bench_i16_min(i16::min_value()) bench_i16_max(i16::max_value()) bench_u128_0(0u128) bench_u128_max(u128::max_value()) bench_i8_0(0i8) bench_i8_min(i8::min_value()) bench_i8_max(i8::max_value()) bench_u8_0(0u8) bench_u8_min(u8::min_value()) bench_u8_max(u8::max_value()) bench_i32_0(0i32) bench_i32_min(i32::min_value()) bench_i32_max(i32::max_value()) bench_u32_0(0u32) bench_u32_min(u32::min_value()) bench_u32_max(u32::max_value()) } } ``` </details> The next step would be to specialize the `ToString` implementation so it doesn't go through the `Display` trait. I'm not sure if it will improve anything but I think it's worth a try. r? `@Amanieu`
2024-10-01	Stabilize `const_slice_from_raw_parts_mut`	Eduardo Sánchez Muñoz	-4/+7