[librsvg: 2/5] (#219) Make path parser have a tokenizer/parser
- From: Federico Mena Quintero <federico src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [librsvg: 2/5] (#219) Make path parser have a tokenizer/parser
- Date: Thu, 11 Jun 2020 22:07:22 +0000 (UTC)
commit 111058f9e495a82155a7ac8b0e54646e35530182
Author: Emile Snyder <emile snyder gmail com>
Date: Sun Jun 7 17:03:34 2020 -0700
(#219) Make path parser have a tokenizer/parser
I did try just using chars and str everywhere, and doing parse::<f64>
on slices for the Number tokens; it made the
path_parser::Parser::parse calls take about 2x as long as the original
code. The [u8] version is still slower, but not that much slower.
https://gitlab.gnome.org/GNOME/librsvg/-/issues/219
rsvg_internals/benches/path_parser.rs | 57 ++-
rsvg_internals/src/path_parser.rs | 920 ++++++++++++++++------------------
2 files changed, 475 insertions(+), 502 deletions(-)
---
diff --git a/rsvg_internals/benches/path_parser.rs b/rsvg_internals/benches/path_parser.rs
index d8c892e4..41842eca 100644
--- a/rsvg_internals/benches/path_parser.rs
+++ b/rsvg_internals/benches/path_parser.rs
@@ -3,9 +3,38 @@ extern crate criterion;
use criterion::{black_box, Criterion};
use rsvg_internals::path_builder::PathBuilder;
-use rsvg_internals::path_parser::parse_path_into_builder;
+use rsvg_internals::path_parser::{parse_path_into_builder, Lexer};
static INPUT: &'static str = "M10 20 C 30,40 50 60-70,80,90 100,110 120,130,140";
+// 0 . 1 . 2 . 3 . 4 .
+
+static BYTES: &'static [u8; 49] = b"M10 20 C 30,40 50 60-70,80,90 100,110 120,130,140";
+// 0 . 1 . 2 . 3 . 4 .
+
+static SLICE_EDGES: [(usize, usize); 14] = [
+ (1, 3),
+ (4, 6),
+ (9, 11),
+ (12, 14),
+ (15, 17),
+ (18, 20),
+ (20, 23),
+ (24, 26),
+ (27, 29),
+ (30, 33),
+ (34, 37),
+ (38, 41),
+ (42, 45),
+ (46, 49),
+];
+
+fn lex_path(input: &str) {
+ let lexer = Lexer::new(black_box(INPUT));
+
+ for (_pos, _token) in lexer {
+ //
+ }
+}
fn path_parser(c: &mut Criterion) {
c.bench_function("parse path into builder", |b| {
@@ -16,6 +45,32 @@ fn path_parser(c: &mut Criterion) {
let _ = parse_path_into_builder(&input, &mut builder);
});
});
+
+ c.bench_function("lex str", |b| {
+ let input = black_box(INPUT);
+
+ b.iter(|| {
+ lex_path(input);
+ });
+ });
+
+ // look at how much time *just* the parse::<i32> part of the lexer should be taking...
+ c.bench_function("std i32 parse (bytes)", |b| {
+ let input = black_box(BYTES);
+ let slice_boundaries = black_box(SLICE_EDGES);
+
+ b.iter(|| {
+ for (a, b) in slice_boundaries.iter() {
+ let a: usize = *a;
+ let b: usize = *b;
+ unsafe {
+ let _ = std::str::from_utf8_unchecked(&input[a..b])
+ .parse::<i32>()
+ .unwrap();
+ }
+ }
+ });
+ });
}
criterion_group!(benches, path_parser);
diff --git a/rsvg_internals/src/path_parser.rs b/rsvg_internals/src/path_parser.rs
index 0fe3b5e3..b9384799 100644
--- a/rsvg_internals/src/path_parser.rs
+++ b/rsvg_internals/src/path_parser.rs
@@ -4,14 +4,179 @@ use std::error::Error;
use std::fmt;
use std::iter::Enumerate;
use std::str;
-use std::str::Chars;
+use std::str::Bytes;
use crate::path_builder::*;
+#[derive(Debug, PartialEq, Copy, Clone)]
+pub enum Token {
+ // pub to allow benchmarking
+ Number(f64),
+ Command(u8),
+ Comma,
+}
+
+use crate::path_parser::Token::{Comma, Command, Number};
+
+#[derive(Debug)]
+pub struct Lexer<'a> {
+ // pub to allow benchmarking
+ input: &'a [u8],
+ ci: Enumerate<Bytes<'a>>,
+ current: Option<(usize, u8)>,
+}
+
+#[derive(Debug, PartialEq, Copy, Clone)]
+pub enum LexError {
+ // pub to allow benchmarking
+ ParseFloatError,
+ ParseIntError,
+ UnexpectedByte(u8),
+ UnexpectedEof,
+}
+
+impl<'a> Lexer<'_> {
+ pub fn new(input: &'a str) -> Lexer {
+ let mut ci = input.bytes().enumerate();
+ let current = ci.next();
+ Lexer {
+ input: input.as_bytes(),
+ ci: ci,
+ current: current,
+ }
+ }
+
+ fn current_pos(&mut self) -> usize {
+ match self.current {
+ None => self.input.len(),
+ Some((pos, _)) => pos,
+ }
+ }
+
+ fn advance(&mut self) {
+ self.current = self.ci.next();
+ }
+
+ fn advance_over_whitespace(&mut self) -> bool {
+ let mut found_some = false;
+
+ while self.current.is_some() && self.current.unwrap().1.is_ascii_whitespace() {
+ found_some = true;
+ self.current = self.ci.next();
+ }
+
+ return found_some;
+ }
+
+ fn advance_over_optional(&mut self, needle: u8) -> bool {
+ match self.current {
+ Some((_, c)) if c == needle => {
+ self.advance();
+ return true;
+ }
+ _ => return false,
+ }
+ }
+
+ fn advance_over_digits(&mut self) -> bool {
+ let mut found_some = false;
+ while self.current.is_some() && self.current.unwrap().1.is_ascii_digit() {
+ found_some = true;
+ self.current = self.ci.next();
+ }
+
+ return found_some;
+ }
+
+ fn advance_over_simple_number(&mut self, is_float: &mut bool) -> bool {
+ let _ = self.advance_over_optional(b'-') || self.advance_over_optional(b'+');
+ let found_digit = self.advance_over_digits();
+ if self.advance_over_optional(b'.') {
+ *is_float = true;
+ }
+ self.advance_over_digits() || found_digit
+ }
+
+ fn match_number(&mut self) -> Result<Token, LexError> {
+ // remember the beginning
+ let (start_pos, _) = self.current.unwrap();
+ let mut is_float = false;
+ if !self.advance_over_simple_number(&mut is_float) && start_pos != self.current_pos() {
+ match self.current {
+ None => return Err(LexError::UnexpectedEof),
+ Some((_pos, c)) => return Err(LexError::UnexpectedByte(c)),
+ }
+ }
+ if self.advance_over_optional(b'e') || self.advance_over_optional(b'E') {
+ is_float = true;
+ self.advance_over_simple_number(&mut is_float);
+ }
+ let end_pos = match self.current {
+ None => self.input.len(),
+ Some((i, _)) => i,
+ };
+ unsafe {
+ // This is OK because we have personally checked every character of
+ // the slice we're about to use as a utf8 string so we know it's all legal.
+ let num_str = std::str::from_utf8_unchecked(&self.input[start_pos..end_pos]);
+
+ // using std::str::parse::<f64> causes quite a hit relative to
+ // the old mixed-lexing-parsing code which did it's own number parsing.
+ // Using parse::<i32> is, annoyingly, about 3-4x faster than the parse::<i64>
+ // call. So do that if we don't have a '.' or a 'e' in the number.
+ if is_float {
+ match num_str.parse::<f64>() {
+ Ok(n) => Ok(Number(n)),
+ Err(_e) => Err(LexError::ParseFloatError),
+ }
+ } else {
+ match num_str.parse::<i32>() {
+ Ok(n) => Ok(Number(n as f64)),
+ Err(_e) => Err(LexError::ParseIntError),
+ }
+ }
+ }
+ }
+}
+
+impl Iterator for Lexer<'_> {
+ type Item = (usize, Result<Token, LexError>);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // eat whitespace
+ self.advance_over_whitespace();
+
+ match self.current {
+ // commas are separators
+ Some((pos, c)) if c == b',' => {
+ self.advance();
+ Some((pos, Ok(Comma)))
+ }
+
+ // alphabetic chars are commands
+ Some((pos, c)) if c.is_ascii_alphabetic() => {
+ let token = Command(c);
+ self.advance();
+ Some((pos, Ok(token)))
+ }
+
+ Some((pos, c)) if c.is_ascii_digit() || c == b'-' || c == b'+' || c == b'.' => {
+ Some((pos, self.match_number()))
+ }
+
+ Some((pos, c)) => {
+ self.advance();
+ Some((pos, Err(LexError::UnexpectedByte(c))))
+ }
+
+ None => None,
+ }
+ }
+}
+
struct PathParser<'b> {
- chars_enumerator: Enumerate<Chars<'b>>,
- lookahead: Option<char>, // None if we are in EOF
- current_pos: Option<usize>, // None if the string hasn't been scanned
+ tokens: Lexer<'b>,
+ current_pos_and_token: Option<(usize, Result<Token, LexError>)>,
builder: &'b mut PathBuilder,
@@ -60,10 +225,11 @@ struct PathParser<'b> {
// M 0.1 -2 300 -4
impl<'b> PathParser<'b> {
fn new(builder: &'b mut PathBuilder, path_str: &'b str) -> PathParser<'b> {
+ let mut lexer = Lexer::new(path_str);
+ let pt = lexer.next();
PathParser {
- chars_enumerator: path_str.chars().enumerate(),
- lookahead: None,
- current_pos: None,
+ tokens: lexer,
+ current_pos_and_token: pt,
builder,
@@ -81,216 +247,114 @@ impl<'b> PathParser<'b> {
}
}
- fn parse(&mut self) -> Result<(), ParseError> {
- self.getchar();
+ // Our match_* methods all either consume the token we requested
+ // and return the unwrapped value, or return an error without
+ // advancing the token stream.
+ //
+ // You can safely use them to probe for a particular kind of token,
+ // fail to match it, and try some other type.
- self.optional_whitespace()?;
- self.moveto_drawto_command_groups()
- }
-
- fn getchar(&mut self) {
- if let Some((pos, c)) = self.chars_enumerator.next() {
- self.lookahead = Some(c);
- self.current_pos = Some(pos);
- } else {
- // We got to EOF; make current_pos point to the position after the last char in the
- // string
- self.lookahead = None;
- if self.current_pos.is_none() {
- self.current_pos = Some(0);
- } else {
- self.current_pos = Some(self.current_pos.unwrap() + 1);
- }
+ fn match_command(&mut self) -> Result<u8, ParseError> {
+ let result = match &self.current_pos_and_token {
+ Some((_, Ok(Command(c)))) => Ok(*c),
+ Some((pos, Ok(t))) => Err(ParseError::new(*pos, UnexpectedToken(*t))),
+ Some((pos, Err(e))) => Err(ParseError::new(*pos, LexError(*e))),
+ None => Err(ParseError::new(self.tokens.input.len(), UnexpectedEof)),
+ };
+ if result.is_ok() {
+ self.current_pos_and_token = self.tokens.next();
}
+ result
}
- fn error(&self, kind: ErrorKind) -> ParseError {
- ParseError {
- position: self.current_pos.unwrap(),
- kind,
+ fn match_number(&mut self) -> Result<f64, ParseError> {
+ let result = match &self.current_pos_and_token {
+ Some((_, Ok(Number(n)))) => Ok(*n),
+ Some((pos, Ok(t))) => Err(ParseError::new(*pos, UnexpectedToken(*t))),
+ Some((pos, Err(e))) => Err(ParseError::new(*pos, LexError(*e))),
+ None => Err(ParseError::new(self.tokens.input.len(), UnexpectedEof)),
+ };
+ if result.is_ok() {
+ self.current_pos_and_token = self.tokens.next();
}
+ result
}
- fn match_char(&mut self, c: char) -> bool {
- if let Some(x) = self.lookahead {
- if c == x {
- self.getchar();
- return true;
- }
+ fn match_comma(&mut self) -> Result<(), ParseError> {
+ let result = match &self.current_pos_and_token {
+ Some((_, Ok(Comma))) => Ok(()),
+ Some((pos, Ok(t))) => Err(ParseError::new(*pos, UnexpectedToken(*t))),
+ Some((pos, Err(e))) => Err(ParseError::new(*pos, LexError(*e))),
+ None => Err(ParseError::new(self.tokens.input.len(), UnexpectedEof)),
+ };
+ if result.is_ok() {
+ self.current_pos_and_token = self.tokens.next();
}
-
- false
+ result
}
- fn whitespace(&mut self) -> Result<(), ParseError> {
- if let Some(c) = self.lookahead {
- if c.is_whitespace() {
- assert!(self.match_char(c));
-
- while let Some(c) = self.lookahead {
- if c.is_whitespace() {
- assert!(self.match_char(c));
- continue;
- } else {
- break;
- }
- }
- }
- }
-
- Ok(())
+ fn eat_optional_comma(&mut self) {
+ let _ = self.match_comma();
}
- fn optional_whitespace(&mut self) -> Result<(), ParseError> {
- let _ = self.whitespace();
- Ok(())
+ // Convenience function; like match_number, but eats a leading comma if present.
+ fn match_comma_number(&mut self) -> Result<f64, ParseError> {
+ self.eat_optional_comma();
+ self.match_number()
}
- fn optional_comma_whitespace(&mut self) -> Result<(), ParseError> {
- self.optional_whitespace()?;
- if self.lookahead_is(',') {
- self.match_char(',');
- self.optional_whitespace()?;
+ fn match_flag(&mut self) -> Result<bool, ParseError> {
+ let pos = match self.current_pos_and_token {
+ Some((pos, _)) => pos,
+ _ => self.tokens.input.len(),
+ };
+ match self.match_number()? {
+ n if n == 1.0 => Ok(true),
+ n if n == 0.0 => Ok(false),
+ n => Err(ParseError::new(pos, ErrorKind::UnexpectedToken(Number(n)))),
}
- Ok(())
}
- fn lookahead_is(&self, c: char) -> bool {
- if let Some(x) = self.lookahead {
- if x == c {
- return true;
- }
- }
+ // peek_* methods are the twins of match_*, but don't consume the token, and so
+ // can't return ParseError
- false
- }
-
- fn lookahead_is_digit(&self, d: &mut char) -> bool {
- if let Some(c) = self.lookahead {
- if c.is_digit(10) {
- *d = c;
- return true;
- }
+ fn peek_command(&mut self) -> Option<u8> {
+ match &self.current_pos_and_token {
+ Some((_, Ok(Command(c)))) => Some(*c),
+ _ => None,
}
-
- false
}
- fn lookahead_is_start_of_number(&mut self) -> bool {
- let mut c = ' ';
- self.lookahead_is_digit(&mut c)
- || self.lookahead_is('.')
- || self.lookahead_is('+')
- || self.lookahead_is('-')
+ fn peek_number(&mut self) -> Option<f64> {
+ match &self.current_pos_and_token {
+ Some((_, Ok(Number(n)))) => Some(*n),
+ _ => None,
+ }
}
- fn number(&mut self) -> Result<f64, ParseError> {
- let mut sign: f64;
-
- sign = 1.0;
-
- if self.match_char('+') {
- sign = 1.0;
- } else if self.match_char('-') {
- sign = -1.0;
+ // This is the entry point for parsing a given blob of path data.
+ // All the parsing just uses various match_* methods to consume tokens
+ // and retrieve the values.
+ fn parse(&mut self) -> Result<(), ParseError> {
+ if self.current_pos_and_token.is_none() {
+ return Ok(());
}
- let mut has_integer_part = false;
- let mut value: f64;
- let mut exponent_sign: f64;
- let mut exponent: Option<f64>;
-
- value = 0.0;
- exponent_sign = 1.0;
- exponent = None;
-
- let mut c: char = ' ';
-
- if self.lookahead_is_digit(&mut c) || self.lookahead_is('.') {
- // Integer part
- while self.lookahead_is_digit(&mut c) {
- has_integer_part = true;
- value = value * 10.0 + f64::from(char_to_digit(c));
-
- assert!(self.match_char(c));
- }
-
- // Fractional part
- if self.match_char('.') {
- let mut fraction: f64 = 1.0;
-
- let mut c: char = ' ';
-
- if !has_integer_part && !self.lookahead_is_digit(&mut c) {
- return Err(self.error(ErrorKind::UnexpectedToken));
- }
-
- while self.lookahead_is_digit(&mut c) {
- fraction /= 10.0;
- value += fraction * f64::from(char_to_digit(c));
-
- assert!(self.match_char(c));
- }
- }
-
- if self.match_char('E') || self.match_char('e') {
- // exponent sign
- if self.match_char('+') {
- exponent_sign = 1.0;
- } else if self.match_char('-') {
- exponent_sign = -1.0;
- }
-
- // exponent
- let mut c: char = ' ';
-
- if self.lookahead_is_digit(&mut c) {
- let mut exp = 0.0;
-
- while self.lookahead_is_digit(&mut c) {
- exp = exp * 10.0 + f64::from(char_to_digit(c));
-
- assert!(self.match_char(c));
- }
-
- exponent = Some(exp);
- } else if self.lookahead.is_some() {
- return Err(self.error(ErrorKind::UnexpectedToken));
- } else {
- return Err(self.error(ErrorKind::UnexpectedEof));
- }
- }
-
- if let Some(exp) = exponent {
- Ok(sign * value * 10.0f64.powf(exp * exponent_sign))
- } else {
- Ok(sign * value)
- }
- } else if self.lookahead.is_some() {
- Err(self.error(ErrorKind::UnexpectedToken))
- } else {
- Err(self.error(ErrorKind::UnexpectedEof))
- }
+ self.moveto_drawto_command_groups()
}
- fn flag(&mut self) -> Result<bool, ParseError> {
- if self.match_char('0') {
- Ok(false)
- } else if self.match_char('1') {
- Ok(true)
- } else if self.lookahead.is_some() {
- Err(self.error(ErrorKind::UnexpectedToken))
- } else {
- Err(self.error(ErrorKind::UnexpectedEof))
+ fn error(&self, kind: ErrorKind) -> ParseError {
+ match self.current_pos_and_token {
+ Some((pos, _)) => ParseError {
+ position: pos,
+ kind,
+ },
+ None => ParseError { position: 0, kind }, // FIXME: ???
}
}
fn coordinate_pair(&mut self) -> Result<(f64, f64), ParseError> {
- let a = self.number()?;
- self.optional_comma_whitespace()?;
- let b = self.number()?;
-
- Ok((a, b))
+ Ok((self.match_number()?, self.match_comma_number()?))
}
fn set_current_point(&mut self, x: f64, y: f64) {
@@ -388,37 +452,6 @@ impl<'b> PathParser<'b> {
);
}
- fn emit_close_path(&mut self) {
- let (x, y) = (self.subpath_start_x, self.subpath_start_y);
- self.set_current_point(x, y);
-
- self.builder.close_path();
- }
-
- fn lineto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
- loop {
- let (mut x, mut y) = self.coordinate_pair()?;
-
- if !absolute {
- x += self.current_x;
- y += self.current_y;
- }
-
- self.emit_line_to(x, y);
-
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
- break;
- }
- }
-
- Ok(())
- }
-
fn moveto_argument_sequence(
&mut self,
absolute: bool,
@@ -437,13 +470,7 @@ impl<'b> PathParser<'b> {
self.emit_move_to(x, y);
}
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- self.lineto_argument_sequence(absolute)
- } else if self.lookahead_is_start_of_number() {
+ if self.match_comma().is_ok() || self.peek_number().is_some() {
self.lineto_argument_sequence(absolute)
} else {
Ok(())
@@ -451,27 +478,15 @@ impl<'b> PathParser<'b> {
}
fn moveto(&mut self, is_initial_moveto: bool) -> Result<(), ParseError> {
- if self.lookahead_is('M') || self.lookahead_is('m') {
- let absolute = if self.match_char('M') {
- true
- } else {
- assert!(self.match_char('m'));
- false
- };
-
- self.optional_whitespace()?;
- self.moveto_argument_sequence(absolute, is_initial_moveto)
- } else if self.lookahead.is_some() {
- Err(self.error(ErrorKind::UnexpectedToken))
- } else {
- Err(self.error(ErrorKind::UnexpectedEof))
+ match self.match_command()? {
+ b'M' => self.moveto_argument_sequence(true, is_initial_moveto),
+ b'm' => self.moveto_argument_sequence(false, is_initial_moveto),
+ c => Err(self.error(ErrorKind::UnexpectedCommand(c))),
}
}
fn moveto_drawto_command_group(&mut self, is_initial_moveto: bool) -> Result<(), ParseError> {
self.moveto(is_initial_moveto)?;
- self.optional_whitespace()?;
-
self.optional_drawto_commands().map(|_| ())
}
@@ -482,8 +497,7 @@ impl<'b> PathParser<'b> {
self.moveto_drawto_command_group(initial)?;
initial = false;
- self.optional_whitespace()?;
- if self.lookahead.is_none() {
+ if self.current_pos_and_token.is_none() {
break;
}
}
@@ -493,69 +507,110 @@ impl<'b> PathParser<'b> {
fn optional_drawto_commands(&mut self) -> Result<bool, ParseError> {
while self.drawto_command()? {
- self.optional_whitespace()?;
+ // everything happens in the drawto_command() calls.
}
Ok(false)
}
+ // FIXME: This should not just fail to match 'M' and 'm', but make sure the
+ // command is in the set of drawto command characters.
+ fn match_if_drawto_command_with_absolute(&mut self) -> Option<(u8, bool)> {
+ let cmd = self.peek_command();
+ let result = match cmd {
+ Some(b'M') => None,
+ Some(b'm') => None,
+ Some(c) => {
+ let c_up = c.to_ascii_uppercase();
+ if c == c_up {
+ Some((c_up, true))
+ } else {
+ Some((c_up, false))
+ }
+ }
+ _ => None,
+ };
+ if result.is_some() {
+ let _ = self.match_command();
+ }
+ result
+ }
+
fn drawto_command(&mut self) -> Result<bool, ParseError> {
- Ok(self.close_path()?
- || self.line_to()?
- || self.horizontal_line_to()?
- || self.vertical_line_to()?
- || self.curve_to()?
- || self.smooth_curve_to()?
- || self.quadratic_bezier_curve_to()?
- || self.smooth_quadratic_bezier_curve_to()?
- || self.elliptical_arc()?)
- }
-
- fn close_path(&mut self) -> Result<bool, ParseError> {
- if self.match_char('Z') || self.match_char('z') {
- self.emit_close_path();
- Ok(true)
- } else {
- Ok(false)
+ match self.match_if_drawto_command_with_absolute() {
+ Some((b'Z', _)) => {
+ self.emit_close_path();
+ Ok(true)
+ }
+ Some((b'L', abs)) => {
+ self.lineto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'H', abs)) => {
+ self.horizontal_lineto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'V', abs)) => {
+ self.vertical_lineto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'C', abs)) => {
+ self.curveto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'S', abs)) => {
+ self.smooth_curveto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'Q', abs)) => {
+ self.quadratic_curveto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'T', abs)) => {
+ self.smooth_quadratic_curveto_argument_sequence(abs)?;
+ Ok(true)
+ }
+ Some((b'A', abs)) => {
+ self.elliptical_arc_argument_sequence(abs)?;
+ Ok(true)
+ }
+ _ => Ok(false),
}
}
- fn line_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('L') || self.lookahead_is('l') {
- let absolute = if self.match_char('L') {
- true
- } else {
- assert!(self.match_char('l'));
- false
- };
+ fn emit_close_path(&mut self) {
+ let (x, y) = (self.subpath_start_x, self.subpath_start_y);
+ self.set_current_point(x, y);
- self.optional_whitespace()?;
+ self.builder.close_path();
+ }
- self.lineto_argument_sequence(absolute)?;
- Ok(true)
+ fn should_break_arg_sequence(&mut self) -> bool {
+ if self.match_comma().is_ok() {
+ // if there is a comma (indicating we should continue to loop), eat the comma
+ // so we're ready at the next start of the loop to process the next token.
+ false
+ } else if !self.peek_number().is_some() {
+ // if the next token is not a number or a comma, we do want to break the loop
+ true
} else {
- Ok(false)
+ // otherwise, continue to loop to process args in the sequence.
+ false
}
}
- fn horizontal_lineto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
+ fn lineto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
loop {
- let mut x = self.number()?;
+ let (mut x, mut y) = self.coordinate_pair()?;
if !absolute {
x += self.current_x;
+ y += self.current_y;
}
- let y = self.current_y;
-
self.emit_line_to(x, y);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
@@ -563,27 +618,29 @@ impl<'b> PathParser<'b> {
Ok(())
}
- fn horizontal_line_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('H') || self.lookahead_is('h') {
- let absolute = if self.match_char('H') {
- true
- } else {
- assert!(self.match_char('h'));
- false
- };
+ fn horizontal_lineto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
+ loop {
+ let mut x = self.match_number()?;
- self.optional_whitespace()?;
+ if !absolute {
+ x += self.current_x;
+ }
- self.horizontal_lineto_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
+ let y = self.current_y;
+
+ self.emit_line_to(x, y);
+
+ if self.should_break_arg_sequence() {
+ break;
+ }
}
+
+ Ok(())
}
fn vertical_lineto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
loop {
- let mut y = self.number()?;
+ let mut y = self.match_number()?;
if !absolute {
y += self.current_y;
@@ -593,12 +650,7 @@ impl<'b> PathParser<'b> {
self.emit_line_to(x, y);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
@@ -606,34 +658,14 @@ impl<'b> PathParser<'b> {
Ok(())
}
- fn vertical_line_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('V') || self.lookahead_is('v') {
- let absolute = if self.match_char('V') {
- true
- } else {
- assert!(self.match_char('v'));
- false
- };
-
- self.optional_whitespace()?;
-
- self.vertical_lineto_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
- }
- }
-
fn curveto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
loop {
let (mut x2, mut y2) = self.coordinate_pair()?;
- self.optional_comma_whitespace()?;
-
+ self.eat_optional_comma();
let (mut x3, mut y3) = self.coordinate_pair()?;
- self.optional_comma_whitespace()?;
-
+ self.eat_optional_comma();
let (mut x4, mut y4) = self.coordinate_pair()?;
if !absolute {
@@ -647,12 +679,7 @@ impl<'b> PathParser<'b> {
self.emit_curve_to(x2, y2, x3, y3, x4, y4);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
@@ -663,9 +690,7 @@ impl<'b> PathParser<'b> {
fn smooth_curveto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
loop {
let (mut x3, mut y3) = self.coordinate_pair()?;
-
- self.optional_comma_whitespace()?;
-
+ self.eat_optional_comma();
let (mut x4, mut y4) = self.coordinate_pair()?;
if !absolute {
@@ -682,12 +707,7 @@ impl<'b> PathParser<'b> {
self.emit_curve_to(x2, y2, x3, y3, x4, y4);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
@@ -695,48 +715,10 @@ impl<'b> PathParser<'b> {
Ok(())
}
- fn curve_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('C') || self.lookahead_is('c') {
- let absolute = if self.match_char('C') {
- true
- } else {
- assert!(self.match_char('c'));
- false
- };
-
- self.optional_whitespace()?;
-
- self.curveto_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
- }
- }
-
- fn smooth_curve_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('S') || self.lookahead_is('s') {
- let absolute = if self.match_char('S') {
- true
- } else {
- assert!(self.match_char('s'));
- false
- };
-
- self.optional_whitespace()?;
-
- self.smooth_curveto_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
- }
- }
-
fn quadratic_curveto_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
loop {
let (mut a, mut b) = self.coordinate_pair()?;
-
- self.optional_comma_whitespace()?;
-
+ self.eat_optional_comma();
let (mut c, mut d) = self.coordinate_pair()?;
if !absolute {
@@ -748,12 +730,7 @@ impl<'b> PathParser<'b> {
self.emit_quadratic_curve_to(a, b, c, d);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
@@ -761,24 +738,6 @@ impl<'b> PathParser<'b> {
Ok(())
}
- fn quadratic_bezier_curve_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('Q') || self.lookahead_is('q') {
- let absolute = if self.match_char('Q') {
- true
- } else {
- assert!(self.match_char('q'));
- false
- };
-
- self.optional_whitespace()?;
-
- self.quadratic_curveto_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
- }
- }
-
fn smooth_quadratic_curveto_argument_sequence(
&mut self,
absolute: bool,
@@ -798,12 +757,7 @@ impl<'b> PathParser<'b> {
self.emit_quadratic_curve_to(a, b, c, d);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
@@ -811,49 +765,25 @@ impl<'b> PathParser<'b> {
Ok(())
}
- fn smooth_quadratic_bezier_curve_to(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('T') || self.lookahead_is('t') {
- let absolute = if self.match_char('T') {
- true
- } else {
- assert!(self.match_char('t'));
- false
- };
-
- self.optional_whitespace()?;
-
- self.smooth_quadratic_curveto_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
- }
- }
-
fn elliptical_arc_argument_sequence(&mut self, absolute: bool) -> Result<(), ParseError> {
loop {
- let rx = self.number()?.abs();
+ let rx = self.match_number()?.abs();
+ let ry = self.match_comma_number()?.abs();
+ let x_axis_rotation = self.match_comma_number()?;
- self.optional_comma_whitespace()?;
+ self.eat_optional_comma();
- let ry = self.number()?.abs();
+ let large_arc = LargeArc(self.match_flag()?);
- self.optional_comma_whitespace()?;
+ self.eat_optional_comma();
- let x_axis_rotation = self.number()?;
-
- self.optional_comma_whitespace()?;
-
- let large_arc = LargeArc(self.flag()?);
-
- self.optional_comma_whitespace()?;
-
- let sweep = if self.flag()? {
+ let sweep = if self.match_flag()? {
Sweep::Positive
} else {
Sweep::Negative
};
- self.optional_comma_whitespace()?;
+ self.eat_optional_comma();
let (mut x, mut y) = self.coordinate_pair()?;
@@ -864,46 +794,21 @@ impl<'b> PathParser<'b> {
self.emit_arc(rx, ry, x_axis_rotation, large_arc, sweep, x, y);
- self.whitespace()?;
-
- if self.lookahead_is(',') {
- assert!(self.match_char(','));
- self.optional_whitespace()?;
- } else if !self.lookahead_is_start_of_number() {
+ if self.should_break_arg_sequence() {
break;
}
}
Ok(())
}
-
- fn elliptical_arc(&mut self) -> Result<bool, ParseError> {
- if self.lookahead_is('A') || self.lookahead_is('a') {
- let absolute = if self.match_char('A') {
- true
- } else {
- assert!(self.match_char('a'));
- false
- };
-
- self.optional_whitespace()?;
-
- self.elliptical_arc_argument_sequence(absolute)?;
- Ok(true)
- } else {
- Ok(false)
- }
- }
-}
-
-fn char_to_digit(c: char) -> i32 {
- c as i32 - '0' as i32
}
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
- UnexpectedToken,
+ UnexpectedToken(Token),
+ UnexpectedCommand(u8),
UnexpectedEof,
+ LexError(LexError),
}
#[derive(Debug, PartialEq)]
@@ -914,11 +819,24 @@ pub struct ParseError {
impl Error for ParseError {}
+impl ParseError {
+ fn new(pos: usize, k: ErrorKind) -> ParseError {
+ ParseError {
+ position: pos,
+ kind: k,
+ }
+ }
+}
+
+use crate::path_parser::ErrorKind::*;
+
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let description = match self.kind {
- ErrorKind::UnexpectedToken => "unexpected token",
- ErrorKind::UnexpectedEof => "unexpected end of data",
+ UnexpectedToken(_t) => "unexpected token",
+ UnexpectedCommand(_c) => "unexpected command",
+ UnexpectedEof => "unexpected end of data",
+ LexError(_le) => "error processing token",
};
write!(f, "error at position {}: {}", self.position, description)
}
@@ -999,9 +917,9 @@ mod tests {
fn handles_empty_data() {
test_parser(
"",
- "^",
+ "",
&Vec::<PathCommand>::new(),
- Some(ErrorKind::UnexpectedEof),
+ None,
);
}
@@ -1089,51 +1007,51 @@ mod tests {
fn detects_bogus_numbers() {
test_parser(
"M+",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedEof),
+ Some(ErrorKind::LexError(LexError::UnexpectedEof)),
);
test_parser(
"M-",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedEof),
+ Some(ErrorKind::LexError(LexError::UnexpectedEof)),
);
test_parser(
"M+x",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::LexError(LexError::UnexpectedByte(b'x'))),
);
test_parser(
"M10e",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedEof),
+ Some(ErrorKind::LexError(LexError::ParseFloatError)),
);
test_parser(
"M10ex",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::LexError(LexError::ParseFloatError)),
);
test_parser(
"M10e-",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedEof),
+ Some(ErrorKind::LexError(LexError::ParseFloatError)),
);
test_parser(
"M10e+x",
- " ^",
+ " ^",
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::LexError(LexError::ParseFloatError)),
);
}
@@ -1676,9 +1594,9 @@ mod tests {
fn first_command_must_be_moveto() {
test_parser(
" L10 20",
- " ^",
+ " ^", // FIXME: why is this not at position 2?
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedCommand(b'L')),
);
}
@@ -1695,7 +1613,7 @@ mod tests {
"M,",
" ^",
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Comma)),
);
test_parser(
@@ -1716,14 +1634,14 @@ mod tests {
"M10x",
" ^",
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Command(b'x'))),
);
test_parser(
"M10,x",
" ^",
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Command(b'x'))),
);
}
@@ -1747,7 +1665,7 @@ mod tests {
"M10-20-30 x",
" ^",
&vec![moveto(10.0, -20.0)],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Command(b'x'))),
);
}
@@ -1757,14 +1675,14 @@ mod tests {
"M10-20z10",
" ^",
&vec![moveto(10.0, -20.0), closepath()],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Number(10.0))),
);
test_parser(
"M10-20z,",
" ^",
&vec![moveto(10.0, -20.0), closepath()],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Comma)),
);
}
@@ -1805,7 +1723,7 @@ mod tests {
"M10-20H,",
" ^",
&vec![moveto(10.0, -20.0)],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Comma)),
);
test_parser(
@@ -1829,7 +1747,7 @@ mod tests {
"M10-20v,",
" ^",
&vec![moveto(10.0, -20.0)],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Comma)),
);
test_parser(
@@ -2127,7 +2045,7 @@ mod tests {
"M10-20A1 2 3 4",
" ^",
&vec![moveto(10.0, -20.0)],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Number(4.0))),
);
test_parser(
@@ -2147,7 +2065,7 @@ mod tests {
"M10-20A1 2 3 1 5",
" ^",
&vec![moveto(10.0, -20.0)],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::UnexpectedToken(Number(5.0))),
);
test_parser(
@@ -2190,9 +2108,9 @@ mod tests {
// https://gitlab.gnome.org/GNOME/librsvg/issues/345
test_parser(
"M.. 1,0 0,100000",
- " ^",
+ " ^", // FIXME: we have to report position of error in lexer errors to make this right
&vec![],
- Some(ErrorKind::UnexpectedToken),
+ Some(ErrorKind::LexError(LexError::UnexpectedByte(b'.'))),
);
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]