regex_syntax/
error.rs

1use std::cmp;
2use std::error;
3use std::fmt;
4use std::result;
5
6use ast;
7use hir;
8
9/// A type alias for dealing with errors returned by this crate.
10pub type Result<T> = result::Result<T, Error>;
11
12/// This error type encompasses any error that can be returned by this crate.
13#[derive(Clone, Debug, Eq, PartialEq)]
14pub enum Error {
15    /// An error that occurred while translating concrete syntax into abstract
16    /// syntax (AST).
17    Parse(ast::Error),
18    /// An error that occurred while translating abstract syntax into a high
19    /// level intermediate representation (HIR).
20    Translate(hir::Error),
21    /// Hints that destructuring should not be exhaustive.
22    ///
23    /// This enum may grow additional variants, so this makes sure clients
24    /// don't count on exhaustive matching. (Otherwise, adding a new variant
25    /// could break existing code.)
26    #[doc(hidden)]
27    __Nonexhaustive,
28}
29
30impl From<ast::Error> for Error {
31    fn from(err: ast::Error) -> Error {
32        Error::Parse(err)
33    }
34}
35
36impl From<hir::Error> for Error {
37    fn from(err: hir::Error) -> Error {
38        Error::Translate(err)
39    }
40}
41
42impl error::Error for Error {
43    fn description(&self) -> &str {
44        match *self {
45            Error::Parse(ref x) => x.description(),
46            Error::Translate(ref x) => x.description(),
47            _ => unreachable!(),
48        }
49    }
50}
51
52impl fmt::Display for Error {
53    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54        match *self {
55            Error::Parse(ref x) => x.fmt(f),
56            Error::Translate(ref x) => x.fmt(f),
57            _ => unreachable!(),
58        }
59    }
60}
61
62/// A helper type for formatting nice error messages.
63///
64/// This type is responsible for reporting regex parse errors in a nice human
65/// readable format. Most of its complexity is from interspersing notational
66/// markers pointing out the position where an error occurred.
67#[derive(Debug)]
68pub struct Formatter<'e, E: 'e> {
69    /// The original regex pattern in which the error occurred.
70    pattern: &'e str,
71    /// The error kind. It must impl fmt::Display.
72    err: &'e E,
73    /// The primary span of the error.
74    span: &'e ast::Span,
75    /// An auxiliary and optional span, in case the error needs to point to
76    /// two locations (e.g., when reporting a duplicate capture group name).
77    aux_span: Option<&'e ast::Span>,
78}
79
80impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
81    fn from(err: &'e ast::Error) -> Self {
82        Formatter {
83            pattern: err.pattern(),
84            err: err.kind(),
85            span: err.span(),
86            aux_span: err.auxiliary_span(),
87        }
88    }
89}
90
91impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
92    fn from(err: &'e hir::Error) -> Self {
93        Formatter {
94            pattern: err.pattern(),
95            err: err.kind(),
96            span: err.span(),
97            aux_span: None,
98        }
99    }
100}
101
102impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
103    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
104        let spans = Spans::from_formatter(self);
105        if self.pattern.contains('\n') {
106            let divider = repeat_char('~', 79);
107
108            writeln!(f, "regex parse error:")?;
109            writeln!(f, "{}", divider)?;
110            let notated = spans.notate();
111            write!(f, "{}", notated)?;
112            writeln!(f, "{}", divider)?;
113            // If we have error spans that cover multiple lines, then we just
114            // note the line numbers.
115            if !spans.multi_line.is_empty() {
116                let mut notes = vec![];
117                for span in &spans.multi_line {
118                    notes.push(format!(
119                        "on line {} (column {}) through line {} (column {})",
120                        span.start.line,
121                        span.start.column,
122                        span.end.line,
123                        span.end.column - 1
124                    ));
125                }
126                writeln!(f, "{}", notes.join("\n"))?;
127            }
128            write!(f, "error: {}", self.err)?;
129        } else {
130            writeln!(f, "regex parse error:")?;
131            let notated = Spans::from_formatter(self).notate();
132            write!(f, "{}", notated)?;
133            write!(f, "error: {}", self.err)?;
134        }
135        Ok(())
136    }
137}
138
139/// This type represents an arbitrary number of error spans in a way that makes
140/// it convenient to notate the regex pattern. ("Notate" means "point out
141/// exactly where the error occurred in the regex pattern.")
142///
143/// Technically, we can only ever have two spans given our current error
144/// structure. However, after toiling with a specific algorithm for handling
145/// two spans, it became obvious that an algorithm to handle an arbitrary
146/// number of spans was actually much simpler.
147struct Spans<'p> {
148    /// The original regex pattern string.
149    pattern: &'p str,
150    /// The total width that should be used for line numbers. The width is
151    /// used for left padding the line numbers for alignment.
152    ///
153    /// A value of `0` means line numbers should not be displayed. That is,
154    /// the pattern is itself only one line.
155    line_number_width: usize,
156    /// All error spans that occur on a single line. This sequence always has
157    /// length equivalent to the number of lines in `pattern`, where the index
158    /// of the sequence represents a line number, starting at `0`. The spans
159    /// in each line are sorted in ascending order.
160    by_line: Vec<Vec<ast::Span>>,
161    /// All error spans that occur over one or more lines. That is, the start
162    /// and end position of the span have different line numbers. The spans are
163    /// sorted in ascending order.
164    multi_line: Vec<ast::Span>,
165}
166
167impl<'p> Spans<'p> {
168    /// Build a sequence of spans from a formatter.
169    fn from_formatter<'e, E: fmt::Display>(
170        fmter: &'p Formatter<'e, E>,
171    ) -> Spans<'p> {
172        let mut line_count = fmter.pattern.lines().count();
173        // If the pattern ends with a `\n` literal, then our line count is
174        // off by one, since a span can occur immediately after the last `\n`,
175        // which is consider to be an additional line.
176        if fmter.pattern.ends_with('\n') {
177            line_count += 1;
178        }
179        let line_number_width =
180            if line_count <= 1 { 0 } else { line_count.to_string().len() };
181        let mut spans = Spans {
182            pattern: &fmter.pattern,
183            line_number_width: line_number_width,
184            by_line: vec![vec![]; line_count],
185            multi_line: vec![],
186        };
187        spans.add(fmter.span.clone());
188        if let Some(span) = fmter.aux_span {
189            spans.add(span.clone());
190        }
191        spans
192    }
193
194    /// Add the given span to this sequence, putting it in the right place.
195    fn add(&mut self, span: ast::Span) {
196        // This is grossly inefficient since we sort after each add, but right
197        // now, we only ever add two spans at most.
198        if span.is_one_line() {
199            let i = span.start.line - 1; // because lines are 1-indexed
200            self.by_line[i].push(span);
201            self.by_line[i].sort();
202        } else {
203            self.multi_line.push(span);
204            self.multi_line.sort();
205        }
206    }
207
208    /// Notate the pattern string with carents (`^`) pointing at each span
209    /// location. This only applies to spans that occur within a single line.
210    fn notate(&self) -> String {
211        let mut notated = String::new();
212        for (i, line) in self.pattern.lines().enumerate() {
213            if self.line_number_width > 0 {
214                notated.push_str(&self.left_pad_line_number(i + 1));
215                notated.push_str(": ");
216            } else {
217                notated.push_str("    ");
218            }
219            notated.push_str(line);
220            notated.push('\n');
221            if let Some(notes) = self.notate_line(i) {
222                notated.push_str(&notes);
223                notated.push('\n');
224            }
225        }
226        notated
227    }
228
229    /// Return notes for the line indexed at `i` (zero-based). If there are no
230    /// spans for the given line, then `None` is returned. Otherwise, an
231    /// appropriately space padded string with correctly positioned `^` is
232    /// returned, accounting for line numbers.
233    fn notate_line(&self, i: usize) -> Option<String> {
234        let spans = &self.by_line[i];
235        if spans.is_empty() {
236            return None;
237        }
238        let mut notes = String::new();
239        for _ in 0..self.line_number_padding() {
240            notes.push(' ');
241        }
242        let mut pos = 0;
243        for span in spans {
244            for _ in pos..(span.start.column - 1) {
245                notes.push(' ');
246                pos += 1;
247            }
248            let note_len = span.end.column.saturating_sub(span.start.column);
249            for _ in 0..cmp::max(1, note_len) {
250                notes.push('^');
251                pos += 1;
252            }
253        }
254        Some(notes)
255    }
256
257    /// Left pad the given line number with spaces such that it is aligned with
258    /// other line numbers.
259    fn left_pad_line_number(&self, n: usize) -> String {
260        let n = n.to_string();
261        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
262        let mut result = repeat_char(' ', pad);
263        result.push_str(&n);
264        result
265    }
266
267    /// Return the line number padding beginning at the start of each line of
268    /// the pattern.
269    ///
270    /// If the pattern is only one line, then this returns a fixed padding
271    /// for visual indentation.
272    fn line_number_padding(&self) -> usize {
273        if self.line_number_width == 0 {
274            4
275        } else {
276            2 + self.line_number_width
277        }
278    }
279}
280
281fn repeat_char(c: char, count: usize) -> String {
282    ::std::iter::repeat(c).take(count).collect()
283}
284
285#[cfg(test)]
286mod tests {
287    use ast::parse::Parser;
288
289    fn assert_panic_message(pattern: &str, expected_msg: &str) -> () {
290        let result = Parser::new().parse(pattern);
291        match result {
292            Ok(_) => {
293                panic!("regex should not have parsed");
294            }
295            Err(err) => {
296                assert_eq!(err.to_string(), expected_msg.trim());
297            }
298        }
299    }
300
301    // See: https://github.com/rust-lang/regex/issues/464
302    #[test]
303    fn regression_464() {
304        let err = Parser::new().parse("a{\n").unwrap_err();
305        // This test checks that the error formatter doesn't panic.
306        assert!(!err.to_string().is_empty());
307    }
308
309    // See: https://github.com/rust-lang/regex/issues/545
310    #[test]
311    fn repetition_quantifier_expects_a_valid_decimal() {
312        assert_panic_message(
313            r"\\u{[^}]*}",
314            r#"
315regex parse error:
316    \\u{[^}]*}
317        ^
318error: repetition quantifier expects a valid decimal
319"#,
320        );
321    }
322}