oysh/crates/oyster_parser/src/parser.rs

234 lines
8 KiB
Rust
Raw Normal View History

2022-07-14 22:26:57 +00:00
use thiserror::Error;
use crate::{lexer::Lexer, Token, TokenKind};
/// Errors that might occur during parsing.
#[derive(Debug, Error)]
pub enum ParseError {
#[error("unexpected pipe")]
UnexpectedPipe,
#[error("unexpected end of file")]
UnexpectedEof,
#[error("unexpected semicolon in the middle of statement")]
UnexpectedSemicolon,
#[error("unmatched closing parenthesis")]
UnexpectedClosingParenthesis,
2022-07-14 22:26:57 +00:00
}
/// Type of the node.
#[derive(Debug)]
pub enum NodeKind {
Whitespace,
Newlines,
Semicolon,
Pipe,
DoubleQuote,
OpeningParenthesis,
ClosingParenthesis,
PlainText,
2022-07-14 22:26:57 +00:00
EscapedChar,
Comment,
Program,
CommandSubstitution,
2022-07-14 22:26:57 +00:00
Pipeline,
Command,
Word,
DQuotedString,
2022-07-14 22:26:57 +00:00
/// Read a pipe but didn't start word yet.
PipelineCont,
}
/// Events required to build a syntax tree.
#[derive(Debug)]
pub enum ParseEvent {
Error(ParseError, usize),
StartNode(NodeKind),
EndNode,
NewLeaf(NodeKind, usize),
}
/// Parse a given source code string.
/// The parsing is decoupled from building the AST and parse tree,
/// instead of returning the tree directly, it returns a linear representation,
/// containing instructions on how to construct the trees.
/// StartNode(Program) and EndNode at the end is implied.
pub struct Parser<'a> {
lex: Lexer<'a>,
lookahead: Token,
buffer: Option<ParseEvent>,
stack: Vec<NodeKind>,
}
impl Parser<'_> {
/// Create a new parser.
pub fn new(source: &str) -> Parser {
let mut lex = Lexer::new(source);
let lookahead = lex.next_command_token();
let mut stack = Vec::with_capacity(5);
stack.push(NodeKind::Program);
Parser {
lex,
lookahead,
buffer: None,
stack,
}
}
}
impl Iterator for Parser<'_> {
type Item = ParseEvent;
fn next(&mut self) -> Option<Self::Item> {
// This is essentially a recursive descent parser.
// The macros somewhat represent things you would do in a true RD implementation.
macro_rules! chain {
($a:expr, $b:expr) => {{
$b;
$a
}};
}
macro_rules! chain_buf {
($a:expr, $b:expr) => {{
self.buffer = $b;
$a
}};
}
macro_rules! call {
($rule:expr) => {{
use NodeKind::*;
self.stack.push($rule);
Some(ParseEvent::StartNode($rule))
}};
}
macro_rules! ret {
() => {{
self.stack.pop();
Some(ParseEvent::EndNode)
}};
}
macro_rules! leaf {
($type:expr $(, $mode:ident)?) => {{
2022-07-14 22:26:57 +00:00
use NodeKind::*;
let len = self.lookahead.len;
self.lookahead = next_token!($($mode)?);
2022-07-14 22:26:57 +00:00
Some(ParseEvent::NewLeaf($type, len))
}};
}
macro_rules! error {
($type:expr $(, $mode:ident)?) => {{
2022-07-14 22:26:57 +00:00
use ParseError::*;
let len = self.lookahead.len;
self.lookahead = next_token!($($mode)?);
2022-07-14 22:26:57 +00:00
Some(ParseEvent::Error($type, len))
}};
}
macro_rules! next_token {
() => {
self.lex.next_command_token()
};
(Command) => {
self.lex.next_command_token()
};
(String) => {
self.lex.next_string_token()
};
}
2022-07-14 22:26:57 +00:00
use TokenKind::*;
if let Some(ev) = self.buffer.take() {
return Some(ev);
}
match self.stack.last() {
None => None,
Some(nt) => match nt {
// XXX: unify Program and CommandSubstitution to avoid duplication
2022-07-14 22:26:57 +00:00
NodeKind::Program => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Newlines => leaf!(Newlines),
Semicolon => leaf!(Semicolon),
Comment => leaf!(Comment),
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Pipeline),
2022-07-14 22:26:57 +00:00
Pipe => error!(UnexpectedPipe),
ClosingParenthesis => error!(UnexpectedClosingParenthesis),
2022-07-14 22:26:57 +00:00
Eof => chain!(None, ret!()), // return silently
},
NodeKind::CommandSubstitution => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Newlines => leaf!(Newlines),
Semicolon => leaf!(Semicolon),
Comment => leaf!(Comment),
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Pipeline),
ClosingParenthesis => match self.stack.get(self.stack.len() - 2) {
Some(&NodeKind::DQuotedString) => {
chain_buf!(leaf!(ClosingParenthesis, String), ret!())
}
_ => chain_buf!(leaf!(ClosingParenthesis, Command), ret!()),
},
Pipe => error!(UnexpectedPipe),
Eof => chain_buf!(error!(UnexpectedEof), ret!()),
},
2022-07-14 22:26:57 +00:00
NodeKind::Pipeline => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Comment => leaf!(Comment),
Pipe => chain!(leaf!(Pipe), call!(PipelineCont)),
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Command),
Newlines | Semicolon | ClosingParenthesis | Eof => ret!(),
2022-07-14 22:26:57 +00:00
},
NodeKind::PipelineCont => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Newlines => leaf!(Newlines),
Comment => leaf!(Comment),
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => {
chain!(call!(Command), ret!())
}
2022-07-14 22:26:57 +00:00
Semicolon => chain_buf!(chain!(error!(UnexpectedSemicolon), ret!()), ret!()),
Pipe => chain!(error!(UnexpectedPipe), ret!()),
ClosingParenthesis => chain!(error!(UnexpectedClosingParenthesis), ret!()),
2022-07-14 22:26:57 +00:00
Eof => chain!(error!(UnexpectedEof), ret!()),
},
NodeKind::Command => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Comment => leaf!(Comment),
PlainText | DoubleQuote | OpeningParenthesis | EscapedChar => call!(Word),
Newlines | Semicolon | Pipe | ClosingParenthesis | Eof => ret!(),
2022-07-14 22:26:57 +00:00
},
NodeKind::Word => match self.lookahead.kind {
PlainText => leaf!(PlainText),
2022-07-14 22:26:57 +00:00
EscapedChar => leaf!(EscapedChar),
DoubleQuote => chain_buf!(call!(DQuotedString), leaf!(DoubleQuote, String)),
OpeningParenthesis => {
chain_buf!(call!(CommandSubstitution), leaf!(OpeningParenthesis))
}
Comment | Whitespace | Newlines | Semicolon | Pipe | ClosingParenthesis
| Eof => ret!(),
2022-07-14 22:26:57 +00:00
},
NodeKind::DQuotedString => match self.lookahead.kind {
PlainText => leaf!(PlainText, String),
EscapedChar => leaf!(EscapedChar, String),
DoubleQuote => chain_buf!(leaf!(DoubleQuote, Command), ret!()),
OpeningParenthesis => chain_buf!(
call!(CommandSubstitution),
leaf!(OpeningParenthesis, Command)
),
ClosingParenthesis => error!(UnexpectedClosingParenthesis, String),
Eof => chain_buf!(error!(UnexpectedEof, Command), ret!()),
_ => unreachable!(),
},
2022-07-14 22:26:57 +00:00
_ => unreachable!(),
},
}
}
}