about summary refs log tree commit diff
path: root/compiler/rustc_transmute/src/layout/dfa.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_transmute/src/layout/dfa.rs')
-rw-r--r--compiler/rustc_transmute/src/layout/dfa.rs301
1 files changed, 108 insertions, 193 deletions
diff --git a/compiler/rustc_transmute/src/layout/dfa.rs b/compiler/rustc_transmute/src/layout/dfa.rs
index d1f58157b69..05afa28db31 100644
--- a/compiler/rustc_transmute/src/layout/dfa.rs
+++ b/compiler/rustc_transmute/src/layout/dfa.rs
@@ -1,5 +1,5 @@
 use std::fmt;
-use std::ops::RangeInclusive;
+use std::iter::Peekable;
 use std::sync::atomic::{AtomicU32, Ordering};
 
 use super::{Byte, Ref, Tree, Uninhabited};
@@ -211,15 +211,15 @@ where
             let b_transitions =
                 b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
 
-            let byte_transitions =
-                a_transitions.byte_transitions.union(&b_transitions.byte_transitions);
-
-            let byte_transitions = byte_transitions.map_states(|(a_dst, b_dst)| {
-                assert!(a_dst.is_some() || b_dst.is_some());
+            let byte_transitions = a_transitions.byte_transitions.union(
+                &b_transitions.byte_transitions,
+                |a_dst, b_dst| {
+                    assert!(a_dst.is_some() || b_dst.is_some());
 
-                queue.enqueue(a_dst, b_dst);
-                mapped((a_dst, b_dst))
-            });
+                    queue.enqueue(a_dst, b_dst);
+                    mapped((a_dst, b_dst))
+                },
+            );
 
             let ref_transitions =
                 a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
@@ -245,18 +245,6 @@ where
         Self { transitions, start, accept }
     }
 
-    pub(crate) fn states_from(
-        &self,
-        state: State,
-        src_validity: RangeInclusive<u8>,
-    ) -> impl Iterator<Item = (Byte, State)> {
-        self.transitions
-            .get(&state)
-            .map(move |t| t.byte_transitions.states_from(src_validity))
-            .into_iter()
-            .flatten()
-    }
-
     pub(crate) fn get_uninit_edge_dst(&self, state: State) -> Option<State> {
         let transitions = self.transitions.get(&state)?;
         transitions.byte_transitions.get_uninit_edge_dst()
@@ -334,95 +322,31 @@ where
 
 use edge_set::EdgeSet;
 mod edge_set {
-    use std::cmp;
-
-    use run::*;
-    use smallvec::{SmallVec, smallvec};
+    use smallvec::SmallVec;
 
     use super::*;
-    mod run {
-        use std::ops::{Range, RangeInclusive};
-
-        use super::*;
-        use crate::layout::Byte;
-
-        /// A logical set of edges.
-        ///
-        /// A `Run` encodes one edge for every byte value in `start..=end`
-        /// pointing to `dst`.
-        #[derive(Eq, PartialEq, Copy, Clone, Debug)]
-        pub(super) struct Run<S> {
-            // `start` and `end` are both inclusive (ie, closed) bounds, as this
-            // is required in order to be able to store 0..=255. We provide
-            // setters and getters which operate on closed/open ranges, which
-            // are more intuitive and easier for performing offset math.
-            start: u8,
-            end: u8,
-            pub(super) dst: S,
-        }
-
-        impl<S> Run<S> {
-            pub(super) fn new(range: RangeInclusive<u8>, dst: S) -> Self {
-                Self { start: *range.start(), end: *range.end(), dst }
-            }
-
-            pub(super) fn from_inclusive_exclusive(range: Range<u16>, dst: S) -> Self {
-                Self {
-                    start: range.start.try_into().unwrap(),
-                    end: (range.end - 1).try_into().unwrap(),
-                    dst,
-                }
-            }
-
-            pub(super) fn contains(&self, idx: u16) -> bool {
-                idx >= u16::from(self.start) && idx <= u16::from(self.end)
-            }
-
-            pub(super) fn as_inclusive_exclusive(&self) -> (u16, u16) {
-                (u16::from(self.start), u16::from(self.end) + 1)
-            }
-
-            pub(super) fn as_byte(&self) -> Byte {
-                Byte::new(self.start..=self.end)
-            }
 
-            pub(super) fn map_state<SS>(self, f: impl FnOnce(S) -> SS) -> Run<SS> {
-                let Run { start, end, dst } = self;
-                Run { start, end, dst: f(dst) }
-            }
-
-            /// Produces a new `Run` whose lower bound is the greater of
-            /// `self`'s existing lower bound and `lower_bound`.
-            pub(super) fn clamp_lower(self, lower_bound: u8) -> Self {
-                let Run { start, end, dst } = self;
-                Run { start: cmp::max(start, lower_bound), end, dst }
-            }
-        }
-    }
-
-    /// The set of outbound byte edges associated with a DFA node (not including
-    /// reference edges).
+    /// The set of outbound byte edges associated with a DFA node.
     #[derive(Eq, PartialEq, Clone, Debug)]
     pub(super) struct EdgeSet<S = State> {
-        // A sequence of runs stored in ascending order. Since the graph is a
-        // DFA, these must be non-overlapping with one another.
-        runs: SmallVec<[Run<S>; 1]>,
-        // The edge labeled with the uninit byte, if any.
+        // A sequence of byte edges with contiguous byte values and a common
+        // destination is stored as a single run.
         //
-        // FIXME(@joshlf): Make `State` a `NonZero` so that this is NPO'd.
-        uninit: Option<S>,
+        // Runs are non-empty, non-overlapping, and stored in ascending order.
+        runs: SmallVec<[(Byte, S); 1]>,
     }
 
     impl<S> EdgeSet<S> {
-        pub(crate) fn new(byte: Byte, dst: S) -> Self {
-            match byte.range() {
-                Some(range) => Self { runs: smallvec![Run::new(range, dst)], uninit: None },
-                None => Self { runs: SmallVec::new(), uninit: Some(dst) },
+        pub(crate) fn new(range: Byte, dst: S) -> Self {
+            let mut this = Self { runs: SmallVec::new() };
+            if !range.is_empty() {
+                this.runs.push((range, dst));
             }
+            this
         }
 
         pub(crate) fn empty() -> Self {
-            Self { runs: SmallVec::new(), uninit: None }
+            Self { runs: SmallVec::new() }
         }
 
         #[cfg(test)]
@@ -431,43 +355,23 @@ mod edge_set {
             S: Ord,
         {
             edges.sort();
-            Self {
-                runs: edges
-                    .into_iter()
-                    .map(|(byte, state)| Run::new(byte.range().unwrap(), state))
-                    .collect(),
-                uninit: None,
-            }
+            Self { runs: edges.into() }
         }
 
         pub(crate) fn iter(&self) -> impl Iterator<Item = (Byte, S)>
         where
             S: Copy,
         {
-            self.uninit
-                .map(|dst| (Byte::uninit(), dst))
-                .into_iter()
-                .chain(self.runs.iter().map(|run| (run.as_byte(), run.dst)))
-        }
-
-        pub(crate) fn states_from(
-            &self,
-            byte: RangeInclusive<u8>,
-        ) -> impl Iterator<Item = (Byte, S)>
-        where
-            S: Copy,
-        {
-            // FIXME(@joshlf): Optimize this. A manual scan over `self.runs` may
-            // permit us to more efficiently discard runs which will not be
-            // produced by this iterator.
-            self.iter().filter(move |(o, _)| Byte::new(byte.clone()).transmutable_into(&o))
+            self.runs.iter().copied()
         }
 
         pub(crate) fn get_uninit_edge_dst(&self) -> Option<S>
         where
             S: Copy,
         {
-            self.uninit
+            // Uninit is ordered last.
+            let &(range, dst) = self.runs.last()?;
+            if range.contains_uninit() { Some(dst) } else { None }
         }
 
         pub(crate) fn map_states<SS>(self, mut f: impl FnMut(S) -> SS) -> EdgeSet<SS> {
@@ -478,95 +382,106 @@ mod edge_set {
                 // allocates the correct number of elements once up-front [1].
                 //
                 // [1] https://doc.rust-lang.org/1.85.0/src/alloc/vec/spec_from_iter_nested.rs.html#47
-                runs: self.runs.into_iter().map(|run| run.map_state(&mut f)).collect(),
-                uninit: self.uninit.map(f),
+                runs: self.runs.into_iter().map(|(b, s)| (b, f(s))).collect(),
             }
         }
 
         /// Unions two edge sets together.
         ///
         /// If `u = a.union(b)`, then for each byte value, `u` will have an edge
-        /// with that byte value and with the destination `(Some(_), None)`,
-        /// `(None, Some(_))`, or `(Some(_), Some(_))` depending on whether `a`,
+        /// with that byte value and with the destination `join(Some(_), None)`,
+        /// `join(None, Some(_))`, or `join(Some(_), Some(_))` depending on whether `a`,
         /// `b`, or both have an edge with that byte value.
         ///
         /// If neither `a` nor `b` have an edge with a particular byte value,
         /// then no edge with that value will be present in `u`.
-        pub(crate) fn union(&self, other: &Self) -> EdgeSet<(Option<S>, Option<S>)>
+        pub(crate) fn union(
+            &self,
+            other: &Self,
+            mut join: impl FnMut(Option<S>, Option<S>) -> S,
+        ) -> EdgeSet<S>
         where
             S: Copy,
         {
-            let uninit = match (self.uninit, other.uninit) {
-                (None, None) => None,
-                (s, o) => Some((s, o)),
-            };
-
-            let mut runs = SmallVec::new();
-
-            // Iterate over `self.runs` and `other.runs` simultaneously,
-            // advancing `idx` as we go. At each step, we advance `idx` as far
-            // as we can without crossing a run boundary in either `self.runs`
-            // or `other.runs`.
-
-            // INVARIANT: `idx < s[0].end && idx < o[0].end`.
-            let (mut s, mut o) = (self.runs.as_slice(), other.runs.as_slice());
-            let mut idx = 0u16;
-            while let (Some((s_run, s_rest)), Some((o_run, o_rest))) =
-                (s.split_first(), o.split_first())
-            {
-                let (s_start, s_end) = s_run.as_inclusive_exclusive();
-                let (o_start, o_end) = o_run.as_inclusive_exclusive();
-
-                // Compute `end` as the end of the current run (which starts
-                // with `idx`).
-                let (end, dst) = match (s_run.contains(idx), o_run.contains(idx)) {
-                    // `idx` is in an existing run in both `s` and `o`, so `end`
-                    // is equal to the smallest of the two ends of those runs.
-                    (true, true) => (cmp::min(s_end, o_end), (Some(s_run.dst), Some(o_run.dst))),
-                    // `idx` is in an existing run in `s`, but not in any run in
-                    // `o`. `end` is either the end of the `s` run or the
-                    // beginning of the next `o` run, whichever comes first.
-                    (true, false) => (cmp::min(s_end, o_start), (Some(s_run.dst), None)),
-                    // The inverse of the previous case.
-                    (false, true) => (cmp::min(s_start, o_end), (None, Some(o_run.dst))),
-                    // `idx` is not in a run in either `s` or `o`, so advance it
-                    // to the beginning of the next run.
-                    (false, false) => {
-                        idx = cmp::min(s_start, o_start);
-                        continue;
-                    }
-                };
+            let xs = self.runs.iter().copied();
+            let ys = other.runs.iter().copied();
+            // FIXME(@joshlf): Merge contiguous runs with common destination.
+            EdgeSet { runs: union(xs, ys).map(|(range, (x, y))| (range, join(x, y))).collect() }
+        }
+    }
+}
+
+/// Merges two sorted sequences into one sorted sequence.
+pub(crate) fn union<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>>(
+    xs: X,
+    ys: Y,
+) -> UnionIter<X, Y> {
+    UnionIter { xs: xs.peekable(), ys: ys.peekable() }
+}
+
+pub(crate) struct UnionIter<X: Iterator, Y: Iterator> {
+    xs: Peekable<X>,
+    ys: Peekable<Y>,
+}
+
+// FIXME(jswrenn) we'd likely benefit from specializing try_fold here.
+impl<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>> Iterator
+    for UnionIter<X, Y>
+{
+    type Item = (Byte, (Option<S>, Option<S>));
 
-                // FIXME(@joshlf): If this is contiguous with the previous run
-                // and has the same `dst`, just merge it into that run rather
-                // than adding a new one.
-                runs.push(Run::from_inclusive_exclusive(idx..end, dst));
-                idx = end;
+    fn next(&mut self) -> Option<Self::Item> {
+        use std::cmp::{self, Ordering};
 
-                if idx >= s_end {
-                    s = s_rest;
+        let ret;
+        match (self.xs.peek_mut(), self.ys.peek_mut()) {
+            (None, None) => {
+                ret = None;
+            }
+            (Some(x), None) => {
+                ret = Some((x.0, (Some(x.1), None)));
+                self.xs.next();
+            }
+            (None, Some(y)) => {
+                ret = Some((y.0, (None, Some(y.1))));
+                self.ys.next();
+            }
+            (Some(x), Some(y)) => {
+                let start;
+                let end;
+                let dst;
+                match x.0.start.cmp(&y.0.start) {
+                    Ordering::Less => {
+                        start = x.0.start;
+                        end = cmp::min(x.0.end, y.0.start);
+                        dst = (Some(x.1), None);
+                    }
+                    Ordering::Greater => {
+                        start = y.0.start;
+                        end = cmp::min(x.0.start, y.0.end);
+                        dst = (None, Some(y.1));
+                    }
+                    Ordering::Equal => {
+                        start = x.0.start;
+                        end = cmp::min(x.0.end, y.0.end);
+                        dst = (Some(x.1), Some(y.1));
+                    }
                 }
-                if idx >= o_end {
-                    o = o_rest;
+                ret = Some((Byte { start, end }, dst));
+                if start == x.0.start {
+                    x.0.start = end;
+                }
+                if start == y.0.start {
+                    y.0.start = end;
+                }
+                if x.0.is_empty() {
+                    self.xs.next();
+                }
+                if y.0.is_empty() {
+                    self.ys.next();
                 }
             }
-
-            // At this point, either `s` or `o` have been exhausted, so the
-            // remaining elements in the other slice are guaranteed to be
-            // non-overlapping. We can add all remaining runs to `runs` with no
-            // further processing.
-            if let Ok(idx) = u8::try_from(idx) {
-                let (slc, map) = if !s.is_empty() {
-                    let map: fn(_) -> _ = |st| (Some(st), None);
-                    (s, map)
-                } else {
-                    let map: fn(_) -> _ = |st| (None, Some(st));
-                    (o, map)
-                };
-                runs.extend(slc.iter().map(|run| run.clamp_lower(idx).map_state(map)));
-            }
-
-            EdgeSet { runs, uninit }
         }
+        ret
     }
 }