2022-07-14 22:26:57 +00:00
|
|
|
use thiserror::Error;
|
|
|
|
|
|
|
|
use crate::{lexer::Lexer, Token, TokenKind};
|
|
|
|
|
|
|
|
/// Errors that might occur during parsing.
|
|
|
|
#[derive(Debug, Error)]
|
|
|
|
pub enum ParseError {
|
|
|
|
#[error("unexpected pipe")]
|
|
|
|
UnexpectedPipe,
|
|
|
|
#[error("unexpected end of file")]
|
|
|
|
UnexpectedEof,
|
|
|
|
#[error("unexpected semicolon in the middle of statement")]
|
|
|
|
UnexpectedSemicolon,
|
2022-10-24 19:27:04 +00:00
|
|
|
#[error("unmatched closing parenthesis")]
|
|
|
|
UnexpectedClosingParenthesis,
|
2022-07-14 22:26:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Type of the node.
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum NodeKind {
|
|
|
|
Whitespace,
|
|
|
|
Newlines,
|
|
|
|
Semicolon,
|
|
|
|
Pipe,
|
2022-09-24 17:17:56 +00:00
|
|
|
DoubleQuote,
|
2022-10-24 19:27:04 +00:00
|
|
|
OpeningParenthesis,
|
|
|
|
ClosingParenthesis,
|
2022-09-24 17:17:56 +00:00
|
|
|
PlainText,
|
2022-07-14 22:26:57 +00:00
|
|
|
EscapedChar,
|
|
|
|
Comment,
|
|
|
|
|
|
|
|
Program,
|
2022-10-24 19:27:04 +00:00
|
|
|
CommandSubstitution,
|
2022-07-14 22:26:57 +00:00
|
|
|
Pipeline,
|
|
|
|
Command,
|
|
|
|
Word,
|
2022-09-24 17:17:56 +00:00
|
|
|
DQuotedString,
|
2022-07-14 22:26:57 +00:00
|
|
|
|
|
|
|
/// Read a pipe but didn't start word yet.
|
|
|
|
PipelineCont,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Events required to build a syntax tree.
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum ParseEvent {
|
|
|
|
Error(ParseError, usize),
|
|
|
|
StartNode(NodeKind),
|
|
|
|
EndNode,
|
|
|
|
NewLeaf(NodeKind, usize),
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Parse a given source code string.
|
|
|
|
/// The parsing is decoupled from building the AST and parse tree,
|
|
|
|
/// instead of returning the tree directly, it returns a linear representation,
|
|
|
|
/// containing instructions on how to construct the trees.
|
|
|
|
/// StartNode(Program) and EndNode at the end is implied.
|
|
|
|
pub struct Parser<'a> {
|
|
|
|
lex: Lexer<'a>,
|
|
|
|
lookahead: Token,
|
|
|
|
buffer: Option<ParseEvent>,
|
|
|
|
stack: Vec<NodeKind>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Parser<'_> {
|
|
|
|
/// Create a new parser.
|
|
|
|
pub fn new(source: &str) -> Parser {
|
|
|
|
let mut lex = Lexer::new(source);
|
|
|
|
let lookahead = lex.next_command_token();
|
|
|
|
let mut stack = Vec::with_capacity(5);
|
|
|
|
stack.push(NodeKind::Program);
|
|
|
|
|
|
|
|
Parser {
|
|
|
|
lex,
|
|
|
|
lookahead,
|
|
|
|
buffer: None,
|
|
|
|
stack,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Iterator for Parser<'_> {
|
|
|
|
type Item = ParseEvent;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
// This is essentially a recursive descent parser.
|
|
|
|
// The macros somewhat represent things you would do in a true RD implementation.
|
|
|
|
|
|
|
|
macro_rules! chain {
|
|
|
|
($a:expr, $b:expr) => {{
|
|
|
|
$b;
|
|
|
|
$a
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! chain_buf {
|
|
|
|
($a:expr, $b:expr) => {{
|
|
|
|
self.buffer = $b;
|
|
|
|
$a
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! call {
|
|
|
|
($rule:expr) => {{
|
|
|
|
use NodeKind::*;
|
|
|
|
self.stack.push($rule);
|
|
|
|
Some(ParseEvent::StartNode($rule))
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! ret {
|
|
|
|
() => {{
|
|
|
|
self.stack.pop();
|
|
|
|
Some(ParseEvent::EndNode)
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! leaf {
|
2022-09-24 17:17:56 +00:00
|
|
|
($type:expr $(, $mode:ident)?) => {{
|
2022-07-14 22:26:57 +00:00
|
|
|
use NodeKind::*;
|
|
|
|
let len = self.lookahead.len;
|
2022-09-24 17:17:56 +00:00
|
|
|
self.lookahead = next_token!($($mode)?);
|
2022-07-14 22:26:57 +00:00
|
|
|
Some(ParseEvent::NewLeaf($type, len))
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! error {
|
2022-09-24 17:17:56 +00:00
|
|
|
($type:expr $(, $mode:ident)?) => {{
|
2022-07-14 22:26:57 +00:00
|
|
|
use ParseError::*;
|
|
|
|
let len = self.lookahead.len;
|
2022-09-24 17:17:56 +00:00
|
|
|
self.lookahead = next_token!($($mode)?);
|
2022-07-14 22:26:57 +00:00
|
|
|
Some(ParseEvent::Error($type, len))
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
2022-09-24 17:17:56 +00:00
|
|
|
macro_rules! next_token {
|
|
|
|
() => {
|
|
|
|
self.lex.next_command_token()
|
|
|
|
};
|
|
|
|
(Command) => {
|
|
|
|
self.lex.next_command_token()
|
|
|
|
};
|
|
|
|
(String) => {
|
|
|
|
self.lex.next_string_token()
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-07-14 22:26:57 +00:00
|
|
|
if let Some(ev) = self.buffer.take() {
|
|
|
|
return Some(ev);
|
|
|
|
}
|
|
|
|
|
2022-11-06 22:41:06 +00:00
|
|
|
use TokenKind::*;
|
|
|
|
|
|
|
|
match self.stack.last()? {
|
2022-11-06 23:03:31 +00:00
|
|
|
NodeKind::Program | NodeKind::CommandSubstitution => match self.lookahead.kind {
|
2022-11-06 22:41:06 +00:00
|
|
|
Whitespace => leaf!(Whitespace),
|
|
|
|
Newlines => leaf!(Newlines),
|
|
|
|
Semicolon => leaf!(Semicolon),
|
|
|
|
Comment => leaf!(Comment),
|
|
|
|
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Pipeline),
|
|
|
|
Pipe => error!(UnexpectedPipe),
|
2022-11-06 23:03:31 +00:00
|
|
|
ClosingParenthesis => match self.stack.last().unwrap() {
|
|
|
|
NodeKind::Program => error!(UnexpectedClosingParenthesis),
|
|
|
|
NodeKind::CommandSubstitution => match self.stack.get(self.stack.len() - 2) {
|
|
|
|
Some(&NodeKind::DQuotedString) => {
|
|
|
|
chain_buf!(leaf!(ClosingParenthesis, String), ret!())
|
|
|
|
}
|
|
|
|
_ => chain_buf!(leaf!(ClosingParenthesis, Command), ret!()),
|
|
|
|
},
|
|
|
|
_ => unreachable!(),
|
|
|
|
},
|
|
|
|
Eof => match self.stack.last().unwrap() {
|
|
|
|
NodeKind::Program => chain!(None, ret!()), // return silently
|
|
|
|
NodeKind::CommandSubstitution => chain_buf!(error!(UnexpectedEof), ret!()),
|
|
|
|
_ => unreachable!(),
|
2022-09-24 17:17:56 +00:00
|
|
|
},
|
2022-11-06 22:41:06 +00:00
|
|
|
},
|
|
|
|
NodeKind::Pipeline => match self.lookahead.kind {
|
|
|
|
Whitespace => leaf!(Whitespace),
|
|
|
|
Comment => leaf!(Comment),
|
|
|
|
Pipe => chain!(leaf!(Pipe), call!(PipelineCont)),
|
|
|
|
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Command),
|
|
|
|
Newlines | Semicolon | ClosingParenthesis | Eof => ret!(),
|
|
|
|
},
|
|
|
|
NodeKind::PipelineCont => match self.lookahead.kind {
|
|
|
|
Whitespace => leaf!(Whitespace),
|
|
|
|
Newlines => leaf!(Newlines),
|
|
|
|
Comment => leaf!(Comment),
|
|
|
|
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => {
|
|
|
|
chain!(call!(Command), ret!())
|
|
|
|
}
|
|
|
|
Semicolon => chain_buf!(chain!(error!(UnexpectedSemicolon), ret!()), ret!()),
|
|
|
|
Pipe => chain!(error!(UnexpectedPipe), ret!()),
|
|
|
|
ClosingParenthesis => chain!(error!(UnexpectedClosingParenthesis), ret!()),
|
|
|
|
Eof => chain!(error!(UnexpectedEof), ret!()),
|
|
|
|
},
|
|
|
|
NodeKind::Command => match self.lookahead.kind {
|
|
|
|
Whitespace => leaf!(Whitespace),
|
|
|
|
Comment => leaf!(Comment),
|
|
|
|
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Word),
|
|
|
|
Newlines | Semicolon | Pipe | ClosingParenthesis | Eof => ret!(),
|
|
|
|
},
|
|
|
|
NodeKind::Word => match self.lookahead.kind {
|
|
|
|
PlainText => leaf!(PlainText),
|
|
|
|
EscapedChar => leaf!(EscapedChar),
|
|
|
|
DoubleQuote => chain_buf!(call!(DQuotedString), leaf!(DoubleQuote, String)),
|
|
|
|
OpeningParenthesis => {
|
|
|
|
chain_buf!(call!(CommandSubstitution), leaf!(OpeningParenthesis))
|
|
|
|
}
|
|
|
|
Comment | Whitespace | Newlines | Semicolon | Pipe | ClosingParenthesis | Eof => {
|
|
|
|
ret!()
|
|
|
|
}
|
|
|
|
},
|
|
|
|
NodeKind::DQuotedString => match self.lookahead.kind {
|
|
|
|
PlainText => leaf!(PlainText, String),
|
|
|
|
EscapedChar => leaf!(EscapedChar, String),
|
|
|
|
DoubleQuote => chain_buf!(leaf!(DoubleQuote, Command), ret!()),
|
|
|
|
OpeningParenthesis => chain_buf!(
|
|
|
|
call!(CommandSubstitution),
|
|
|
|
leaf!(OpeningParenthesis, Command)
|
|
|
|
),
|
|
|
|
ClosingParenthesis => error!(UnexpectedClosingParenthesis, String),
|
|
|
|
Eof => chain_buf!(error!(UnexpectedEof, Command), ret!()),
|
2022-07-14 22:26:57 +00:00
|
|
|
_ => unreachable!(),
|
|
|
|
},
|
2022-11-06 22:41:06 +00:00
|
|
|
_ => unreachable!(),
|
2022-07-14 22:26:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|