use thiserror::Error; use crate::{lexer::Lexer, Token, TokenKind}; /// Errors that might occur during parsing. #[derive(Debug, Error)] pub enum ParseError { #[error("unexpected pipe")] UnexpectedPipe, #[error("unexpected end of file")] UnexpectedEof, #[error("unexpected semicolon in the middle of statement")] UnexpectedSemicolon, } /// Type of the node. #[derive(Debug)] pub enum NodeKind { Whitespace, Newlines, Semicolon, Pipe, PlainWord, EscapedChar, Comment, Program, Pipeline, Command, Word, /// Read a pipe but didn't start word yet. PipelineCont, } /// Events required to build a syntax tree. #[derive(Debug)] pub enum ParseEvent { Error(ParseError, usize), StartNode(NodeKind), EndNode, NewLeaf(NodeKind, usize), } /// Parse a given source code string. /// The parsing is decoupled from building the AST and parse tree, /// instead of returning the tree directly, it returns a linear representation, /// containing instructions on how to construct the trees. /// StartNode(Program) and EndNode at the end is implied. #[derive(Debug)] pub struct Parser<'a> { lex: Lexer<'a>, lookahead: Token, buffer: Option, stack: Vec, } impl Parser<'_> { /// Create a new parser. pub fn new(source: &str) -> Parser { let mut lex = Lexer::new(source); let lookahead = lex.next_command_token(); let mut stack = Vec::with_capacity(5); stack.push(NodeKind::Program); Parser { lex, lookahead, buffer: None, stack, } } } impl Iterator for Parser<'_> { type Item = ParseEvent; fn next(&mut self) -> Option { // This is essentially a recursive descent parser. // The macros somewhat represent things you would do in a true RD implementation. macro_rules! chain { ($a:expr, $b:expr) => {{ $b; $a }}; } macro_rules! chain_buf { ($a:expr, $b:expr) => {{ self.buffer = $b; $a }}; } macro_rules! call { ($rule:expr) => {{ use NodeKind::*; self.stack.push($rule); Some(ParseEvent::StartNode($rule)) }}; } macro_rules! ret { () => {{ self.stack.pop(); Some(ParseEvent::EndNode) }}; } macro_rules! tailcall { ($rule:expr) => {{ use NodeKind::*; self.stack.pop(); self.stack.push($rule); Some(ParseEvent::StartNode($rule)) }}; } macro_rules! leaf { ($type:expr) => {{ use NodeKind::*; let len = self.lookahead.len; self.lookahead = self.lex.next_command_token(); Some(ParseEvent::NewLeaf($type, len)) }}; } macro_rules! error { ($type:expr) => {{ use ParseError::*; let len = self.lookahead.len; self.lookahead = self.lex.next_command_token(); Some(ParseEvent::Error($type, len)) }}; } use TokenKind::*; if let Some(ev) = self.buffer.take() { return Some(ev); } match self.stack.last() { None => None, Some(nt) => match nt { NodeKind::Program => match self.lookahead.kind { Whitespace => leaf!(Whitespace), Newlines => leaf!(Newlines), Semicolon => leaf!(Semicolon), Comment => leaf!(Comment), PlainWord | EscapedChar => call!(Pipeline), Pipe => error!(UnexpectedPipe), Eof => chain!(None, ret!()), // return silently }, NodeKind::Pipeline => match self.lookahead.kind { Whitespace => leaf!(Whitespace), Comment => leaf!(Comment), Pipe => chain!(leaf!(Pipe), call!(PipelineCont)), PlainWord | EscapedChar => call!(Command), Newlines | Semicolon | Eof => ret!(), }, NodeKind::PipelineCont => match self.lookahead.kind { Whitespace => leaf!(Whitespace), Newlines => leaf!(Newlines), Comment => leaf!(Comment), PlainWord | EscapedChar => tailcall!(Command), Semicolon => chain_buf!(chain!(error!(UnexpectedSemicolon), ret!()), ret!()), Pipe => chain!(error!(UnexpectedPipe), ret!()), Eof => chain!(error!(UnexpectedEof), ret!()), }, NodeKind::Command => match self.lookahead.kind { Whitespace => leaf!(Whitespace), Comment => leaf!(Comment), PlainWord | EscapedChar => call!(Word), Newlines | Semicolon | Eof => ret!(), Pipe => ret!(), }, NodeKind::Word => match self.lookahead.kind { PlainWord => leaf!(PlainWord), EscapedChar => leaf!(EscapedChar), Comment | Whitespace | Newlines | Semicolon | Pipe | Eof => ret!(), }, _ => unreachable!(), }, } } }