feat(parser): parse double quoted strings

This commit is contained in:
buffet 2022-09-24 17:17:56 +00:00
parent c1881bb7b2
commit 78593f1667
48 changed files with 594 additions and 68 deletions

View file

@ -73,11 +73,11 @@ fn build_tree_pipeline<'a>(
ParseEvent::StartNode(NodeKind::Command) => {
children.push(build_tree_command(parser, source, pos)?)
}
ParseEvent::EndNode => break,
ParseEvent::NewLeaf(NodeKind::Pipe, _) => {
*pos += 1;
children.last_mut().unwrap().1 = Redirect::Stdout;
}
ParseEvent::EndNode => break,
ParseEvent::NewLeaf(_, len) => *pos += len,
ParseEvent::Error(err, _) => return Err(err),
_ => unreachable!(),
@ -116,9 +116,38 @@ fn build_tree_word<'a>(
) -> Result<Word<'a>, ParseError> {
let mut children = vec![];
while let Some(ev) = parser.next() {
match ev {
ParseEvent::NewLeaf(NodeKind::PlainText, len) => {
children.push(WordPart::Text(&source[*pos..*pos + len]));
*pos += len
}
ParseEvent::NewLeaf(NodeKind::EscapedChar, len) => {
children.push(WordPart::Text(&source[*pos + 1..*pos + len]));
*pos += len
}
ParseEvent::StartNode(NodeKind::DQuotedString) => {
build_tree_string(parser, source, pos, &mut children)?
}
ParseEvent::EndNode => break,
ParseEvent::NewLeaf(_, len) => *pos += len,
ParseEvent::Error(err, _) => return Err(err),
_ => unreachable!(),
}
}
Ok(Word(children))
}
fn build_tree_string<'a>(
parser: &mut Parser,
source: &'a str,
pos: &mut usize,
children: &mut Vec<WordPart<'a>>,
) -> Result<(), ParseError> {
for ev in parser {
match ev {
ParseEvent::NewLeaf(NodeKind::PlainWord, len) => {
ParseEvent::NewLeaf(NodeKind::PlainText, len) => {
children.push(WordPart::Text(&source[*pos..*pos + len]));
*pos += len
}
@ -127,10 +156,11 @@ fn build_tree_word<'a>(
*pos += len
}
ParseEvent::EndNode => break,
ParseEvent::NewLeaf(_, len) => *pos += len,
ParseEvent::Error(err, _) => return Err(err),
_ => unreachable!(),
}
}
Ok(Word(children))
Ok(())
}

View file

@ -12,8 +12,10 @@ pub enum TokenKind {
Semicolon,
/// A pipe.
Pipe,
/// A plain, unquoted word.
PlainWord,
/// Double quotes.
DoubleQuote,
/// Plain text. Either outside or inside of quotes.
PlainText,
/// A backslash followed by another character.
EscapedChar,
/// A line comment, from # to newline.
@ -101,6 +103,7 @@ impl Lexer<'_> {
let kind = match c {
';' => TokenKind::Semicolon,
'|' => TokenKind::Pipe,
'"' => TokenKind::DoubleQuote,
c if is_whitespace(c) => {
self.eat_while(is_whitespace);
@ -123,8 +126,37 @@ impl Lexer<'_> {
}
_ => {
self.eat_while(|c| ![' ', '\t', '\n', ';', '|', '\\'].contains(&c));
TokenKind::PlainWord
self.eat_while(|c| ![' ', '\t', '\n', ';', '|', '\\', '"'].contains(&c));
TokenKind::PlainText
}
};
let len = self.token_len();
Token { kind, len }
}
}
}
/// Get the next token in the string sublanguage.
pub fn next_string_token(&mut self) -> Token {
match self.next_char() {
None => Token {
kind: TokenKind::Eof,
len: 0,
},
Some(c) => {
let kind = match c {
'"' => TokenKind::DoubleQuote,
'\\' => {
self.next_char();
TokenKind::EscapedChar
}
_ => {
self.eat_while(|c| !['"', '\\'].contains(&c));
TokenKind::PlainText
}
};

View file

@ -8,7 +8,7 @@
//! _terminator ::= SEMICOLON | NEWLINES | EOF
//! pipeline ::= command (PIPE NEWLINES? command)*
//! command ::= word+
//! word ::= (PLAIN_WORD)+
//! word ::= (PLAIN_TEXT|DQUOTES PLAIN_TEXT DQUOTES)+
//!
//! extras ::= COMMENT | WHITESPACE | BACKSLASH_N
//! ```

View file

@ -20,7 +20,8 @@ pub enum NodeKind {
Newlines,
Semicolon,
Pipe,
PlainWord,
DoubleQuote,
PlainText,
EscapedChar,
Comment,
@ -28,6 +29,7 @@ pub enum NodeKind {
Pipeline,
Command,
Word,
DQuotedString,
/// Read a pipe but didn't start word yet.
PipelineCont,
@ -118,23 +120,35 @@ impl Iterator for Parser<'_> {
}
macro_rules! leaf {
($type:expr) => {{
($type:expr $(, $mode:ident)?) => {{
use NodeKind::*;
let len = self.lookahead.len;
self.lookahead = self.lex.next_command_token();
self.lookahead = next_token!($($mode)?);
Some(ParseEvent::NewLeaf($type, len))
}};
}
macro_rules! error {
($type:expr) => {{
($type:expr $(, $mode:ident)?) => {{
use ParseError::*;
let len = self.lookahead.len;
self.lookahead = self.lex.next_command_token();
self.lookahead = next_token!($($mode)?);
Some(ParseEvent::Error($type, len))
}};
}
macro_rules! next_token {
() => {
self.lex.next_command_token()
};
(Command) => {
self.lex.next_command_token()
};
(String) => {
self.lex.next_string_token()
};
}
use TokenKind::*;
if let Some(ev) = self.buffer.take() {
@ -149,7 +163,7 @@ impl Iterator for Parser<'_> {
Newlines => leaf!(Newlines),
Semicolon => leaf!(Semicolon),
Comment => leaf!(Comment),
PlainWord | EscapedChar => call!(Pipeline),
PlainText | DoubleQuote | EscapedChar => call!(Pipeline),
Pipe => error!(UnexpectedPipe),
Eof => chain!(None, ret!()), // return silently
},
@ -157,14 +171,14 @@ impl Iterator for Parser<'_> {
Whitespace => leaf!(Whitespace),
Comment => leaf!(Comment),
Pipe => chain!(leaf!(Pipe), call!(PipelineCont)),
PlainWord | EscapedChar => call!(Command),
PlainText | DoubleQuote | EscapedChar => call!(Command),
Newlines | Semicolon | Eof => ret!(),
},
NodeKind::PipelineCont => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Newlines => leaf!(Newlines),
Comment => leaf!(Comment),
PlainWord | EscapedChar => tailcall!(Command),
PlainText | DoubleQuote | EscapedChar => tailcall!(Command),
Semicolon => chain_buf!(chain!(error!(UnexpectedSemicolon), ret!()), ret!()),
Pipe => chain!(error!(UnexpectedPipe), ret!()),
Eof => chain!(error!(UnexpectedEof), ret!()),
@ -172,15 +186,23 @@ impl Iterator for Parser<'_> {
NodeKind::Command => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Comment => leaf!(Comment),
PlainWord | EscapedChar => call!(Word),
PlainText | DoubleQuote | EscapedChar => call!(Word),
Newlines | Semicolon | Eof => ret!(),
Pipe => ret!(),
},
NodeKind::Word => match self.lookahead.kind {
PlainWord => leaf!(PlainWord),
PlainText => leaf!(PlainText),
EscapedChar => leaf!(EscapedChar),
DoubleQuote => chain_buf!(call!(DQuotedString), leaf!(DoubleQuote, String)),
Comment | Whitespace | Newlines | Semicolon | Pipe | Eof => ret!(),
},
NodeKind::DQuotedString => match self.lookahead.kind {
PlainText => leaf!(PlainText, String),
EscapedChar => leaf!(EscapedChar, String),
DoubleQuote => chain_buf!(leaf!(DoubleQuote, Command), ret!()),
Eof => chain_buf!(error!(UnexpectedEof, Command), ret!()),
_ => unreachable!(),
},
_ => unreachable!(),
},
}

View file

@ -143,3 +143,30 @@ fn reject_double_pipe() {
assert_snapshot!(actual);
}
#[test]
fn double_quote_string() {
let source = r#""hello world""#;
let actual = parse(source);
assert_snapshot!(actual);
}
#[test]
fn escaped_char_in_double_quotes() {
let source = r#""hello \" world""#;
let actual = parse(source);
assert_snapshot!(actual);
}
#[test]
fn unterminated_double_quotes() {
let source = r#""hello world"#;
let actual = parse(source);
assert_snapshot!(actual);
}

View file

@ -143,3 +143,30 @@ fn reject_double_pipe() {
assert_snapshot!(actual);
}
#[test]
fn double_quote_string() {
let source = r#""hello world""#;
let actual = parse(source);
assert_snapshot!(actual);
}
#[test]
fn escaped_char_in_double_quotes() {
let source = r#""hello \" world""#;
let actual = parse(source);
assert_snapshot!(actual);
}
#[test]
fn unterminated_double_quotes() {
let source = r#""hello world"#;
let actual = parse(source);
assert_snapshot!(actual);
}

View file

@ -81,3 +81,30 @@ fn comment() {
assert_snapshot!(actual);
}
#[test]
fn double_quotes_in_command() {
let source = r#"""#;
let actual = Lexer::new(source).next_command_token();
assert_snapshot!(actual);
}
#[test]
fn double_quotes_in_string() {
let source = r#"""#;
let actual = Lexer::new(source).next_string_token();
assert_snapshot!(actual);
}
#[test]
fn escaped_quotes_in_string() {
let source = r#"\""#;
let actual = Lexer::new(source).next_string_token();
assert_snapshot!(actual);
}

View file

@ -143,3 +143,30 @@ fn reject_double_pipe() {
assert_snapshot!(actual);
}
#[test]
fn double_quote_string() {
let source = r#""hello world""#;
let actual = parse(source);
assert_snapshot!(actual);
}
#[test]
fn escaped_char_in_double_quotes() {
let source = r#""hello \" world""#;
let actual = parse(source);
assert_snapshot!(actual);
}
#[test]
fn unterminated_double_quotes() {
let source = r#""hello world"#;
let actual = parse(source);
assert_snapshot!(actual);
}

View file

@ -0,0 +1,28 @@
---
source: crates/oyster_parser/tests/it/ast.rs
expression: actual
---
Ok(
Code(
[
Pipeline(
Pipeline(
[
Command(
[
Word(
[
Text(
"hello world",
),
],
),
],
None,
),
],
),
),
],
),
)

View file

@ -0,0 +1,34 @@
---
source: crates/oyster_parser/tests/it/ast.rs
expression: actual
---
Ok(
Code(
[
Pipeline(
Pipeline(
[
Command(
[
Word(
[
Text(
"hello ",
),
Text(
"\"",
),
Text(
" world",
),
],
),
],
None,
),
],
),
),
],
),
)

View file

@ -0,0 +1,7 @@
---
source: crates/oyster_parser/tests/it/ast.rs
expression: actual
---
Err(
UnexpectedEof,
)

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],
@ -49,7 +49,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 2,
},
],
@ -62,7 +62,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 2,
},
],

View file

@ -0,0 +1,41 @@
---
source: crates/oyster_parser/tests/it/cst.rs
expression: actual
---
Tree {
kind: Program,
children: [
Tree {
kind: Pipeline,
children: [
Tree {
kind: Command,
children: [
Tree {
kind: Word,
children: [
Tree {
kind: DQuotedString,
children: [
Leaf {
kind: DoubleQuote,
len: 1,
},
Leaf {
kind: PlainText,
len: 11,
},
Leaf {
kind: DoubleQuote,
len: 1,
},
],
},
],
},
],
},
],
},
],
}

View file

@ -0,0 +1,49 @@
---
source: crates/oyster_parser/tests/it/cst.rs
expression: actual
---
Tree {
kind: Program,
children: [
Tree {
kind: Pipeline,
children: [
Tree {
kind: Command,
children: [
Tree {
kind: Word,
children: [
Tree {
kind: DQuotedString,
children: [
Leaf {
kind: DoubleQuote,
len: 1,
},
Leaf {
kind: PlainText,
len: 6,
},
Leaf {
kind: EscapedChar,
len: 2,
},
Leaf {
kind: PlainText,
len: 6,
},
Leaf {
kind: DoubleQuote,
len: 1,
},
],
},
],
},
],
},
],
},
],
}

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],
@ -41,7 +41,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 2,
},
],
@ -54,7 +54,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 2,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],
@ -41,7 +41,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 3,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],
@ -49,7 +49,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 3,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],
@ -46,7 +46,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 3,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],
@ -54,7 +54,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 3,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 5,
},
],
@ -42,7 +42,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 2,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 4,
},
],
@ -28,7 +28,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 2,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 6,
},
],

View file

@ -0,0 +1,41 @@
---
source: crates/oyster_parser/tests/it/cst.rs
expression: actual
---
Tree {
kind: Program,
children: [
Tree {
kind: Pipeline,
children: [
Tree {
kind: Command,
children: [
Tree {
kind: Word,
children: [
Tree {
kind: DQuotedString,
children: [
Leaf {
kind: DoubleQuote,
len: 1,
},
Leaf {
kind: PlainText,
len: 11,
},
Error {
kind: UnexpectedEof,
len: 0,
},
],
},
],
},
],
},
],
},
],
}

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 4,
},
],

View file

@ -15,7 +15,7 @@ Tree {
kind: Word,
children: [
Leaf {
kind: PlainWord,
kind: PlainText,
len: 5,
},
Leaf {
@ -23,7 +23,7 @@ Tree {
len: 2,
},
Leaf {
kind: PlainWord,
kind: PlainText,
len: 5,
},
],

View file

@ -0,0 +1,8 @@
---
source: crates/oyster_parser/tests/it/lexer.rs
expression: actual
---
Token {
kind: DoubleQuote,
len: 1,
}

View file

@ -0,0 +1,8 @@
---
source: crates/oyster_parser/tests/it/lexer.rs
expression: actual
---
Token {
kind: DoubleQuote,
len: 1,
}

View file

@ -0,0 +1,8 @@
---
source: crates/oyster_parser/tests/it/lexer.rs
expression: actual
---
Token {
kind: EscapedChar,
len: 2,
}

View file

@ -3,6 +3,6 @@ source: crates/oyster_parser/tests/it/lexer.rs
expression: actual
---
Token {
kind: PlainWord,
kind: PlainText,
len: 6,
}

View file

@ -3,6 +3,6 @@ source: crates/oyster_parser/tests/it/lexer.rs
expression: actual
---
Token {
kind: PlainWord,
kind: PlainText,
len: 13,
}

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,
@ -45,7 +45,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
2,
),
EndNode,
@ -57,7 +57,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
2,
),
EndNode,

View file

@ -0,0 +1,34 @@
---
source: crates/oyster_parser/tests/it/parser.rs
expression: actual
---
[
StartNode(
Pipeline,
),
StartNode(
Command,
),
StartNode(
Word,
),
StartNode(
DQuotedString,
),
NewLeaf(
DoubleQuote,
1,
),
NewLeaf(
PlainText,
11,
),
NewLeaf(
DoubleQuote,
1,
),
EndNode,
EndNode,
EndNode,
EndNode,
]

View file

@ -0,0 +1,42 @@
---
source: crates/oyster_parser/tests/it/parser.rs
expression: actual
---
[
StartNode(
Pipeline,
),
StartNode(
Command,
),
StartNode(
Word,
),
StartNode(
DQuotedString,
),
NewLeaf(
DoubleQuote,
1,
),
NewLeaf(
PlainText,
6,
),
NewLeaf(
EscapedChar,
2,
),
NewLeaf(
PlainText,
6,
),
NewLeaf(
DoubleQuote,
1,
),
EndNode,
EndNode,
EndNode,
EndNode,
]

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,
@ -37,7 +37,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
2,
),
EndNode,
@ -49,7 +49,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
2,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,
@ -37,7 +37,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
3,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,
@ -45,7 +45,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
3,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,
@ -41,7 +41,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
3,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,
@ -49,7 +49,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
3,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
5,
),
EndNode,
@ -37,7 +37,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
2,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
4,
),
EndNode,
@ -25,7 +25,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
2,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
6,
),
EndNode,

View file

@ -0,0 +1,34 @@
---
source: crates/oyster_parser/tests/it/parser.rs
expression: actual
---
[
StartNode(
Pipeline,
),
StartNode(
Command,
),
StartNode(
Word,
),
StartNode(
DQuotedString,
),
NewLeaf(
DoubleQuote,
1,
),
NewLeaf(
PlainText,
11,
),
Error(
UnexpectedEof,
0,
),
EndNode,
EndNode,
EndNode,
EndNode,
]

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
4,
),
EndNode,

View file

@ -13,7 +13,7 @@ expression: actual
Word,
),
NewLeaf(
PlainWord,
PlainText,
5,
),
NewLeaf(
@ -21,7 +21,7 @@ expression: actual
2,
),
NewLeaf(
PlainWord,
PlainText,
4,
),
EndNode,