src/libcore/char.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

/*
Module: char

Utilities for manipulating the char type
*/

/*
Function: is_whitespace

Indicates whether a character is whitespace.

Whitespace characters include space (U+0020), tab (U+0009), line feed
(U+000A), carriage return (U+000D), and a number of less common
ASCII and unicode characters.
*/
pure fn is_whitespace(c: char) -> bool {
    const ch_space: char = '\u0020';
    const ch_ogham_space_mark: char = '\u1680';
    const ch_mongolian_vowel_sep: char = '\u180e';
    const ch_en_quad: char = '\u2000';
    const ch_em_quad: char = '\u2001';
    const ch_en_space: char = '\u2002';
    const ch_em_space: char = '\u2003';
    const ch_three_per_em_space: char = '\u2004';
    const ch_four_per_em_space: char = '\u2005';
    const ch_six_per_em_space: char = '\u2006';
    const ch_figure_space: char = '\u2007';
    const ch_punctuation_space: char = '\u2008';
    const ch_thin_space: char = '\u2009';
    const ch_hair_space: char = '\u200a';
    const ch_narrow_no_break_space: char = '\u202f';
    const ch_medium_mathematical_space: char = '\u205f';
    const ch_ideographic_space: char = '\u3000';
    const ch_line_separator: char = '\u2028';
    const ch_paragraph_separator: char = '\u2029';
    const ch_character_tabulation: char = '\u0009';
    const ch_line_feed: char = '\u000a';
    const ch_line_tabulation: char = '\u000b';
    const ch_form_feed: char = '\u000c';
    const ch_carriage_return: char = '\u000d';
    const ch_next_line: char = '\u0085';
    const ch_no_break_space: char = '\u00a0';

    if c == ch_space {
        true
    } else if c == ch_ogham_space_mark {
        true
    } else if c == ch_mongolian_vowel_sep {
        true
    } else if c == ch_en_quad {
        true
    } else if c == ch_em_quad {
        true
    } else if c == ch_en_space {
        true
    } else if c == ch_em_space {
        true
    } else if c == ch_three_per_em_space {
        true
    } else if c == ch_four_per_em_space {
        true
    } else if c == ch_six_per_em_space {
        true
    } else if c == ch_figure_space {
        true
    } else if c == ch_punctuation_space {
        true
    } else if c == ch_thin_space {
        true
    } else if c == ch_hair_space {
        true
    } else if c == ch_narrow_no_break_space {
        true
    } else if c == ch_medium_mathematical_space {
        true
    } else if c == ch_ideographic_space {
        true
    } else if c == ch_line_tabulation {
        true
    } else if c == ch_paragraph_separator {
        true
    } else if c == ch_character_tabulation {
        true
    } else if c == ch_line_feed {
        true
    } else if c == ch_line_tabulation {
        true
    } else if c == ch_form_feed {
        true
    } else if c == ch_carriage_return {
        true
    } else if c == ch_next_line {
        true
    } else if c == ch_no_break_space { true } else { false }
}

/*
 Function: to_digit

 Convert a char to the corresponding digit.

 Parameters:
   c - a char, either '0' to '9', 'a' to 'z' or 'A' to 'Z'

 Returns:
   If `c` is between '0' and '9', the corresponding value between 0 and 9.
 If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc.

 Safety note:
   This function fails if `c` is not a valid char
*/
pure fn to_digit(c: char) -> u8 unsafe {
    alt maybe_digit(c) {
      option::some(x) { x }
      option::none. { fail; }
    }
}

/*
 Function: to_digit

 Convert a char to the corresponding digit. Returns none when the
 character is not a valid hexadecimal digit.
*/
pure fn maybe_digit(c: char) -> option::t<u8> {
    alt c {
      '0' to '9' { option::some(c as u8 - ('0' as u8)) }
      'a' to 'z' { option::some(c as u8 + 10u8 - ('a' as u8)) }
      'A' to 'Z' { option::some(c as u8 + 10u8 - ('A' as u8)) }
      _ { option::none }
    }
}

/*
 Function: cmp

 Compare two chars.

 Parameters:
  a - a char
  b - a char

 Returns:
  -1 if a<b, 0 if a==b, +1 if a>b
*/
pure fn cmp(a: char, b: char) -> int {
    ret  if b > a { -1 }
    else if b < a { 1 }
    else { 0 }
}