about summary refs log tree commit diff
path: root/tests/ui/patchable-function-entry/patchable-function-entry-flags.rs
diff options
context:
space:
mode:
authorAndy Sadler <andrewsadler122@gmail.com>2023-10-05 19:23:46 -0500
committerAndy Sadler <andrewsadler122@gmail.com>2023-10-17 16:20:55 -0500
commit64abf5862ffb5b32f1555642550eb18f383fdc3a (patch)
treeefb3a8014c55047f076043046cd401550b6ec3af /tests/ui/patchable-function-entry/patchable-function-entry-flags.rs
parent4dce75f2e7bc7e75e7d4d4a77ccd84819411d162 (diff)
downloadrust-64abf5862ffb5b32f1555642550eb18f383fdc3a.tar.gz
rust-64abf5862ffb5b32f1555642550eb18f383fdc3a.zip
optimize popcount implementation
In the current implementation, the gcc backend of rustc currently emits the
following for a function that implements popcount for a u32 (x86_64 targeting
AVX2, using standard unix calling convention):

    popcount:
        mov     eax, edi
        and     edi, 1431655765
        shr     eax
        and     eax, 1431655765
        add     edi, eax
        mov     edx, edi
        and     edi, 858993459
        shr     edx, 2
        and     edx, 858993459
        add     edx, edi
        mov     eax, edx
        and     edx, 252645135
        shr     eax, 4
        and     eax, 252645135
        add     eax, edx
        mov     edx, eax
        and     eax, 16711935
        shr     edx, 8
        and     edx, 16711935
        add     edx, eax
        movzx   eax, dx
        shr     edx, 16
        add     eax, edx
        ret

Rather than using this implementation, gcc could be told to use Wenger's
algorithm.  This would give the same function the following implementation:

    popcount:
        xor eax, eax
        xor edx, edx
        popcnt eax, edi
        test edi, edi
        cmove eax, edx
        ret

This patch implements the popcount operation in terms of Wenger's algorithm in
all cases.

Signed-off-by: Andy Sadler <andrewsadler122@gmail.com>
Diffstat (limited to 'tests/ui/patchable-function-entry/patchable-function-entry-flags.rs')
0 files changed, 0 insertions, 0 deletions