[librsvg/rustification] path_parser.rs: Start implementing a recursive descent parser



commit 19d94f39f6eef183253d414d8eb037cb6112b7dc
Author: Federico Mena Quintero <federico gnome org>
Date:   Fri Nov 4 07:17:09 2016 -0600

    path_parser.rs: Start implementing a recursive descent parser
    
    The current parser in C works fine for most cases of SVG in the wild,
    but it doesn't fully comply with the BNF specified in the SVG spec.
    We'll write a new parser for this.

 rust/src/path_parser.rs |  499 +++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 487 insertions(+), 12 deletions(-)
---
diff --git a/rust/src/path_parser.rs b/rust/src/path_parser.rs
index 20e00f7..bd45572 100644
--- a/rust/src/path_parser.rs
+++ b/rust/src/path_parser.rs
@@ -1,10 +1,388 @@
+use std::str;
+use std::str::Chars;
+use std::iter::Enumerate;
 use path_builder::*;
 
 extern crate cairo;
 
+struct Parser<'external> {
+    path_str: &'external str,
+    chars_enumerator: Enumerate<Chars<'external>>,
+    lookahead: Option <char>, /* None if we are in EOF */
+    current_pos: usize,
+
+    builder: &'external mut RsvgPathBuilder,
+
+    error_message: &'static str,
+    has_error: bool,
+
+    current_x: f64,
+    current_y: f64
+}
+
+impl<'external> Parser<'external> {
+    pub fn new (builder: &'external mut RsvgPathBuilder, path_str: &'external str) -> Parser<'external> {
+        Parser {
+            path_str: path_str,
+            chars_enumerator: path_str.chars ().enumerate (),
+            lookahead: None,
+            current_pos: 0,
+
+            builder: builder,
+
+            error_message: "",
+            has_error: false,
+
+            current_x: 0.0,
+            current_y: 0.0
+        }
+    }
+
+    pub fn parse (&mut self) -> bool {
+        self.getchar ();
+
+        return self.optional_whitespace () &&
+            self.moveto_drawto_command_groups () && 
+            self.optional_whitespace ();
+    }
+
+    fn getchar (&mut self) {
+        if let Some ((pos, c)) = self.chars_enumerator.next () {
+            self.lookahead = Some (c);
+            self.current_pos = pos;
+        } else {
+            self.lookahead = None;
+            self.current_pos += 1; /* this is EOF; point just past the end the string */
+        }
+    }
+
+    fn error (&mut self, message: &'static str) -> bool {
+        self.error_message = message;
+        self.has_error = true;
+        false
+    }
+
+    fn match_char (&mut self, c: char) -> bool {
+        if let Some (x) = self.lookahead {
+            if c == x {
+                self.getchar ();
+                return true;
+            }
+        }
+
+        false
+    }
+
+    fn whitespace (&mut self) -> bool {
+        if let Some (c) = self.lookahead {
+            if c.is_whitespace () {
+                assert! (self.match_char (c));
+
+                while let Some (c) = self.lookahead {
+                    if c.is_whitespace () {
+                        assert! (self.match_char (c));
+                        continue;
+                    } else {
+                        break;
+                    }
+                }
+
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        false
+    }
+
+    fn optional_whitespace (&mut self) -> bool {
+        self.whitespace ();
+        true
+    }
+
+    fn optional_comma_whitespace (&mut self) -> bool {
+        assert! (self.optional_whitespace ());
+        if self.match_char (',') {
+            self.optional_whitespace ()
+        } else {
+            true
+        }
+    }
+
+    fn lookahead_is_digit (&self, d: &mut char) -> bool {
+        if let Some (c) = self.lookahead {
+            if c.is_digit (10) {
+                *d = c;
+                return true;
+            }
+        }
+
+        false
+    }
+
+    fn number (&mut self) -> Option <f64> {
+        let mut has_sign: bool;
+        let mut value: f64;
+        let mut sign: f64;
+        let mut exponent_sign: f64;
+        let mut exponent: f64;
+
+        has_sign = false;
+        sign = 1.0;
+        value = 0.0;
+        exponent_sign = 1.0;
+        exponent = 0.0;
+
+        if self.match_char ('+') {
+            sign = 1.0;
+            has_sign = true;
+        } else if self.match_char ('-') {
+            sign = -1.0;
+            has_sign = true;
+        }
+
+        let mut c: char = ' ';
+
+        if self.lookahead_is_digit (&mut c) {
+            /* Integer part */
+
+            while self.lookahead_is_digit (&mut c) {
+                value = value * 10.0 + char_to_digit (c) as f64;
+
+                assert! (self.match_char (c));
+            }
+
+            /* Fractional part */
+
+            if self.match_char ('.') {
+                let mut fraction: f64 = 1.0;
+
+                let mut c: char = ' ';
+
+                while self.lookahead_is_digit (&mut c) {
+                    fraction = fraction / 10.0;
+                    value += fraction * char_to_digit (c) as f64;
+
+                    assert! (self.match_char (c));
+                }
+            }
+
+            if self.match_char ('E') || self.match_char ('e') {
+                /* exponent sign */
+
+                if self.match_char ('+') {
+                    exponent_sign = 1.0;
+                } else if self.match_char ('-') {
+                    exponent_sign = -1.0;
+                }
+
+                /* exponent */
+
+                let mut c: char = ' ';
+
+                if self.lookahead_is_digit (&mut c) {
+                    while self.lookahead_is_digit (&mut c) {
+                        exponent = exponent * 10.0 + char_to_digit (c) as f64;
+
+                        assert! (self.match_char (c));
+                    }
+                } else {
+                    self.error ("Expected digits for exponent");
+                    return None;
+                }
+            }
+
+            Some (value * sign * 10.0f64.powf (exponent * exponent_sign))
+        } else {
+            if has_sign {
+                self.error ("Expected number after sign");
+            }
+
+            None
+        }
+    }
+
+    fn coordinate_pair (&mut self) -> Option<(f64, f64)> {
+        if let Some (num1) = self.number () {
+            assert! (self.optional_comma_whitespace ());
+
+            if let Some (num2) = self.number () {
+                return Some ((num1, num2));
+            } else {
+                self.error ("Expected second coordinate of coordinate pair");
+                return None
+            }
+        }
+
+        None
+    }
+
+    fn lookahead_is (&self, c: char) -> bool {
+        if let Some (x) = self.lookahead {
+            if x == c {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    fn emit_line_to (&mut self, absolute: bool, x: f64, y: f64) {
+        if absolute {
+            self.current_x = x;
+            self.current_y = y;
+        } else {
+            self.current_x += x;
+            self.current_y += y;
+        }
+
+        self.builder.line_to (self.current_x, self.current_y);
+        println! ("emitting lineto {} {}", self.current_x, self.current_y);
+    }
+
+    fn lineto_argument_sequence (&mut self, absolute: bool) -> bool {
+        if let Some ((x, y)) = self.coordinate_pair () {
+            self.emit_line_to (absolute, x, y);
+
+            self.whitespace ();
+
+            if self.lookahead_is (',') {
+                assert! (self.match_char (','));
+                assert! (self.optional_whitespace ());
+
+                if !self.lineto_argument_sequence (absolute) {
+                    self.error ("Expected coordinate pair after comma");
+                    return false;
+                }
+            }
+
+            self.lineto_argument_sequence (absolute);
+            true
+        } else {
+            false
+        }
+    }
+
+    fn emit_move_to (&mut self, absolute: bool, x: f64, y: f64) {
+        if absolute {
+            self.current_x = x;
+            self.current_y = y;
+        } else {
+            self.current_x += x;
+            self.current_y += y;
+        }
+
+        self.builder.move_to (self.current_x, self.current_y);
+        println! ("emitting moveto {} {}", self.current_x, self.current_y);
+    }
+
+    fn moveto_argument_sequence (&mut self, absolute: bool, is_initial_moveto: bool) -> bool {
+        if let Some ((x, y)) = self.coordinate_pair () {
+            if is_initial_moveto {
+                self.emit_move_to (true, x, y);
+            } else {
+                self.emit_move_to (absolute, x, y);
+            }
+
+            self.whitespace ();
+
+            if self.lookahead_is (',') {
+                assert! (self.match_char (','));
+                assert! (self.optional_whitespace ());
+
+                if !self.lineto_argument_sequence (absolute) {
+                    self.error ("Expected coordinate pair after comma");
+                    return false;
+                }
+            }
+
+            self.lineto_argument_sequence (absolute);
+            true
+        } else {
+            self.error ("Expected coordinate pair after moveto")
+        }
+    }
+
+    fn moveto (&mut self, is_initial_moveto: bool) -> bool {
+        if self.lookahead_is ('M') || self.lookahead_is ('m') { 
+            let absolute: bool;
+
+            if self.match_char ('M') {
+                absolute = true;
+            } else {
+                assert! (self.match_char ('m'));
+                absolute = false;
+            }
+
+            return self.optional_whitespace () &&
+                self.moveto_argument_sequence (absolute, is_initial_moveto);
+        }
+
+        false
+    }
+
+    fn moveto_drawto_command_group (&mut self, is_initial_moveto: bool) -> bool {
+        if self.moveto (is_initial_moveto) {
+            return self.optional_whitespace () &&
+                true; // FIXME self.optional_drawto_commands ();
+        } else {
+            false
+        }
+    }
+
+    fn moveto_drawto_command_groups (&mut self) -> bool {
+        if self.moveto_drawto_command_group (true) {
+            loop {
+                self.optional_whitespace ();
+                if !self.moveto_drawto_command_group (false) {
+                    break;
+                }
+            }
+
+            true
+        } else {
+            self.error ("Expected moveto command")
+        }
+    }
+}
+
+fn char_to_digit (c: char) -> i32 {
+    c as i32 - '0' as i32
+}
+
+
+
+fn print_error (parser: &Parser) {
+    let prefix = "Error in \"";
+
+    println! ("");
+
+    println! ("{}{}\"", prefix, &parser.path_str);
+
+    for _ in 0 .. (prefix.len() + parser.current_pos) {
+        print! (" ");
+    }
+
+    println! ("^ pos {}", parser.current_pos);
+    println! ("{}", &parser.error_message);
+}
 
 fn parse_path (path_str: &str) -> RsvgPathBuilder {
-    let builder = RsvgPathBuilder::new ();
+    let mut builder = RsvgPathBuilder::new ();
+
+    {
+        let mut parser = Parser::new (&mut builder, path_str);
+        if parser.parse () {
+            /* all okay */
+        } else {
+            print_error (&parser);
+
+            /* FIXME: we aren't returning errors at all.  Just return the
+             * parsed path up to here, per the spec.
+             */
+        }
+    }
 
     builder
 }
@@ -12,12 +390,15 @@ fn parse_path (path_str: &str) -> RsvgPathBuilder {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use path_builder::*;
     extern crate cairo;
 
     fn path_segment_vectors_are_equal (a: &Vec<cairo::PathSegment>,
                                        b: &Vec<cairo::PathSegment>) -> bool {
+
+        if a.len() != b.len () {
+            return false;
+        }
+
         if a.len () == 0 && b.len () == 0 {
             return true;
         }
@@ -28,35 +409,129 @@ mod tests {
             if let Some ((seg1, seg2)) = iter.next () {
                 match *seg1 {
                     cairo::PathSegment::MoveTo ((x, y)) => {
-                        if let cairo::PathSegment::MoveTo ((ox, oy)) = *seg2 { return (x, y) == (ox, oy); }
+                        if let cairo::PathSegment::MoveTo ((ox, oy)) = *seg2 {
+                            println! ("{} {} {} {}", x, y, ox, oy);
+                            if (x, y) != (ox, oy) {
+                                return false;
+                            }
+                        } else {
+                            return false;
+                        }
                     },
 
                     cairo::PathSegment::LineTo ((x, y)) => {
-                        if let cairo::PathSegment::LineTo ((ox, oy)) = *seg2 { return (x, y) == (ox, oy); }
+                        if let cairo::PathSegment::LineTo ((ox, oy)) = *seg2 {
+                            println! ("{} {} {} {}", x, y, ox, oy);
+                            if (x, y) != (ox, oy) {
+                                return false;
+                            }
+                        } else {
+                            return false;
+                        }
                     },
 
                     cairo::PathSegment::CurveTo ((x2, y2), (x3, y3), (x4, y4)) => {
                         if let cairo::PathSegment::CurveTo ((ox2, oy2), (ox3, oy3), (ox4, oy4)) = *seg2 {
-                            return (ox2, oy2, ox3, oy3, ox4, oy4) == (x2, y2, x3, y3, x4, y4);
+                            if (ox2, oy2, ox3, oy3, ox4, oy4) != (x2, y2, x3, y3, x4, y4) {
+                                return false;
+                            }
+                        } else {
+                            return false;
                         }
                     },
 
                     cairo::PathSegment::ClosePath => {
-                        if let cairo::PathSegment::ClosePath = *seg2 { return true; }
+                        if let cairo::PathSegment::ClosePath = *seg2 {
+                            /* okay */
+                        } else {
+                            return false;
+                        }
                     }
                 }
             } else {
-                return false;
+                break;
             }
         }
+
+        true
+    }
+
+    fn test_parser (path_str: &str,
+                    expected_segments: &Vec<cairo::PathSegment>) {
+        let builder = super::parse_path (path_str);
+        let segments = builder.get_path_segments ();
+
+        assert! (path_segment_vectors_are_equal (expected_segments, segments));
+    }
+
+    fn moveto (x: f64, y: f64) -> cairo::PathSegment {
+        cairo::PathSegment::MoveTo ((x, y))
+    }
+
+    fn lineto (x: f64, y: f64) -> cairo::PathSegment {
+        cairo::PathSegment::LineTo ((x, y))
+    }
+
+    fn curveto (x2: f64, y2: f64, x3: f64, y3: f64, x4: f64, y4: f64) -> cairo::PathSegment {
+        cairo::PathSegment::CurveTo ((x2, y2), (x3, y3), (x4, y4))
+    }
+
+    fn closepath () -> cairo::PathSegment {
+        cairo::PathSegment::ClosePath
     }
 
     #[test]
     fn path_parser_handles_empty_data () {
-        let builder = super::parse_path ("");
-        let segments = builder.get_path_segments ();
-        let expected_segments = Vec::<cairo::PathSegment>::new ();
+        test_parser ("",
+                     &Vec::<cairo::PathSegment>::new ());
+    }
+
+    #[test]
+    fn path_parser_handles_single_moveto () {
+        test_parser ("M 10 20",
+                     &vec![
+                         moveto (10.0, 20.0)
+                     ]);
 
-        assert! (path_segment_vectors_are_equal (&expected_segments, segments));
+        test_parser ("M10,20",
+                     &vec![
+                         moveto (10.0, 20.0)
+                     ]);
+
+        test_parser ("M10 20",
+                     &vec![
+                         moveto (10.0, 20.0)
+                     ]);
+
+        test_parser ("    M10,20     ",
+                     &vec![
+                         moveto (10.0, 20.0)
+                     ]);
+    }
+
+    #[test]
+    fn path_parser_handles_relative_moveto () {
+        test_parser ("m10 20",
+                     &vec![
+                         moveto (10.0, 20.0)
+                     ]);
+    }
+
+    #[test]
+    fn path_parser_handles_absolute_moveto_with_implicit_linetos () {
+        test_parser ("M10 20 30 40",
+                     &vec![
+                         moveto (10.0, 20.0),
+                         lineto (30.0, 40.0)
+                     ]);
+    }
+
+    #[test]
+    fn path_parser_handles_relative_moveto_with_implicit_linetos () {
+        test_parser ("m10 20 30 40",
+                     &vec![
+                         moveto (10.0, 20.0),
+                         lineto (40.0, 60.0)
+                     ]);
     }
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]