about summary refs log tree commit diff
path: root/src/librustdoc/html/render/search_index/encode.rs
blob: d15e13a2d3742f7d6a6112e030e58a0fbb3253e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
pub(crate) fn write_signed_vlqhex_to_string(n: i32, string: &mut String) {
    let (sign, magnitude): (bool, u32) =
        if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) };
    // zig-zag encoding
    let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 });
    // Self-terminating hex use capital letters for everything but the
    // least significant digit, which is lowercase. For example, decimal 17
    // would be `` Aa `` if zig-zag encoding weren't used.
    //
    // Zig-zag encoding, however, stores the sign bit as the last bit.
    // This means, in the last hexit, 1 is actually `c`, -1 is `b`
    // (`a` is the imaginary -0), and, because all the bits are shifted
    // by one, `` A` `` is actually 8 and `` Aa `` is -8.
    //
    // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
    // describes the encoding in more detail.
    let mut shift: u32 = 28;
    let mut mask: u32 = 0xF0_00_00_00;
    // first skip leading zeroes
    while shift < 32 {
        let hexit = (value & mask) >> shift;
        if hexit != 0 || shift == 0 {
            break;
        }
        shift = shift.wrapping_sub(4);
        mask >>= 4;
    }
    // now write the rest
    while shift < 32 {
        let hexit = (value & mask) >> shift;
        let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
        string.push(hex);
        shift = shift.wrapping_sub(4);
        mask >>= 4;
    }
}

pub fn read_signed_vlqhex_from_string(string: &[u8]) -> Option<(i32, usize)> {
    let mut n = 0i32;
    let mut i = 0;
    while let Some(&c) = string.get(i) {
        i += 1;
        n = (n << 4) | i32::from(c & 0xF);
        if c >= 96 {
            // zig-zag encoding
            let (sign, magnitude) = (n & 1, n >> 1);
            let value = if sign == 0 { 1 } else { -1 } * magnitude;
            return Some((value, i));
        }
    }
    None
}

pub fn write_postings_to_string(postings: &[Vec<u32>], buf: &mut Vec<u8>) {
    for list in postings {
        if list.is_empty() {
            buf.push(0);
            continue;
        }
        let len_before = buf.len();
        stringdex::internals::encode::write_bitmap_to_bytes(&list, &mut *buf).unwrap();
        let len_after = buf.len();
        if len_after - len_before > 1 + (4 * list.len()) && list.len() < 0x3a {
            buf.truncate(len_before);
            buf.push(list.len() as u8);
            for &item in list {
                buf.push(item as u8);
                buf.push((item >> 8) as u8);
                buf.push((item >> 16) as u8);
                buf.push((item >> 24) as u8);
            }
        }
    }
}

pub fn read_postings_from_string(postings: &mut Vec<Vec<u32>>, mut buf: &[u8]) {
    use stringdex::internals::decode::RoaringBitmap;
    while let Some(&c) = buf.get(0) {
        if c < 0x3a {
            buf = &buf[1..];
            let mut slot = Vec::new();
            for _ in 0..c {
                slot.push(
                    (buf[0] as u32)
                        | ((buf[1] as u32) << 8)
                        | ((buf[2] as u32) << 16)
                        | ((buf[3] as u32) << 24),
                );
                buf = &buf[4..];
            }
            postings.push(slot);
        } else {
            let (bitmap, consumed_bytes_len) =
                RoaringBitmap::from_bytes(buf).unwrap_or_else(|| (RoaringBitmap::default(), 0));
            assert_ne!(consumed_bytes_len, 0);
            postings.push(bitmap.to_vec());
            buf = &buf[consumed_bytes_len..];
        }
    }
}