188 lines
5.5 KiB
Rust
188 lines
5.5 KiB
Rust
use thiserror::Error;
|
|
|
|
use crate::{lexer::Lexer, Token, TokenKind};
|
|
|
|
/// Errors that might occur during parsing.
|
|
#[derive(Debug, Error)]
|
|
pub enum ParseError {
|
|
#[error("unexpected pipe")]
|
|
UnexpectedPipe,
|
|
#[error("unexpected end of file")]
|
|
UnexpectedEof,
|
|
#[error("unexpected semicolon in the middle of statement")]
|
|
UnexpectedSemicolon,
|
|
}
|
|
|
|
/// Type of the node.
|
|
#[derive(Debug)]
|
|
pub enum NodeKind {
|
|
Whitespace,
|
|
Newlines,
|
|
Semicolon,
|
|
Pipe,
|
|
PlainWord,
|
|
EscapedChar,
|
|
Comment,
|
|
|
|
Program,
|
|
Pipeline,
|
|
Command,
|
|
Word,
|
|
|
|
/// Read a pipe but didn't start word yet.
|
|
PipelineCont,
|
|
}
|
|
|
|
/// Events required to build a syntax tree.
|
|
#[derive(Debug)]
|
|
pub enum ParseEvent {
|
|
Error(ParseError, usize),
|
|
StartNode(NodeKind),
|
|
EndNode,
|
|
NewLeaf(NodeKind, usize),
|
|
}
|
|
|
|
/// Parse a given source code string.
|
|
/// The parsing is decoupled from building the AST and parse tree,
|
|
/// instead of returning the tree directly, it returns a linear representation,
|
|
/// containing instructions on how to construct the trees.
|
|
/// StartNode(Program) and EndNode at the end is implied.
|
|
#[derive(Debug)]
|
|
pub struct Parser<'a> {
|
|
lex: Lexer<'a>,
|
|
lookahead: Token,
|
|
buffer: Option<ParseEvent>,
|
|
stack: Vec<NodeKind>,
|
|
}
|
|
|
|
impl Parser<'_> {
|
|
/// Create a new parser.
|
|
pub fn new(source: &str) -> Parser {
|
|
let mut lex = Lexer::new(source);
|
|
let lookahead = lex.next_command_token();
|
|
let mut stack = Vec::with_capacity(5);
|
|
stack.push(NodeKind::Program);
|
|
|
|
Parser {
|
|
lex,
|
|
lookahead,
|
|
buffer: None,
|
|
stack,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Iterator for Parser<'_> {
|
|
type Item = ParseEvent;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
// This is essentially a recursive descent parser.
|
|
// The macros somewhat represent things you would do in a true RD implementation.
|
|
|
|
macro_rules! chain {
|
|
($a:expr, $b:expr) => {{
|
|
$b;
|
|
$a
|
|
}};
|
|
}
|
|
|
|
macro_rules! chain_buf {
|
|
($a:expr, $b:expr) => {{
|
|
self.buffer = $b;
|
|
$a
|
|
}};
|
|
}
|
|
|
|
macro_rules! call {
|
|
($rule:expr) => {{
|
|
use NodeKind::*;
|
|
self.stack.push($rule);
|
|
Some(ParseEvent::StartNode($rule))
|
|
}};
|
|
}
|
|
|
|
macro_rules! ret {
|
|
() => {{
|
|
self.stack.pop();
|
|
Some(ParseEvent::EndNode)
|
|
}};
|
|
}
|
|
|
|
macro_rules! tailcall {
|
|
($rule:expr) => {{
|
|
use NodeKind::*;
|
|
self.stack.pop();
|
|
self.stack.push($rule);
|
|
Some(ParseEvent::StartNode($rule))
|
|
}};
|
|
}
|
|
|
|
macro_rules! leaf {
|
|
($type:expr) => {{
|
|
use NodeKind::*;
|
|
let len = self.lookahead.len;
|
|
self.lookahead = self.lex.next_command_token();
|
|
Some(ParseEvent::NewLeaf($type, len))
|
|
}};
|
|
}
|
|
|
|
macro_rules! error {
|
|
($type:expr) => {{
|
|
use ParseError::*;
|
|
let len = self.lookahead.len;
|
|
self.lookahead = self.lex.next_command_token();
|
|
Some(ParseEvent::Error($type, len))
|
|
}};
|
|
}
|
|
|
|
use TokenKind::*;
|
|
|
|
if let Some(ev) = self.buffer.take() {
|
|
return Some(ev);
|
|
}
|
|
|
|
match self.stack.last() {
|
|
None => None,
|
|
Some(nt) => match nt {
|
|
NodeKind::Program => match self.lookahead.kind {
|
|
Whitespace => leaf!(Whitespace),
|
|
Newlines => leaf!(Newlines),
|
|
Semicolon => leaf!(Semicolon),
|
|
Comment => leaf!(Comment),
|
|
PlainWord | EscapedChar => call!(Pipeline),
|
|
Pipe => error!(UnexpectedPipe),
|
|
Eof => chain!(None, ret!()), // return silently
|
|
},
|
|
NodeKind::Pipeline => match self.lookahead.kind {
|
|
Whitespace => leaf!(Whitespace),
|
|
Comment => leaf!(Comment),
|
|
Pipe => chain!(leaf!(Pipe), call!(PipelineCont)),
|
|
PlainWord | EscapedChar => call!(Command),
|
|
Newlines | Semicolon | Eof => ret!(),
|
|
},
|
|
NodeKind::PipelineCont => match self.lookahead.kind {
|
|
Whitespace => leaf!(Whitespace),
|
|
Newlines => leaf!(Newlines),
|
|
Comment => leaf!(Comment),
|
|
PlainWord | EscapedChar => tailcall!(Command),
|
|
Semicolon => chain_buf!(chain!(error!(UnexpectedSemicolon), ret!()), ret!()),
|
|
Pipe => chain!(error!(UnexpectedPipe), ret!()),
|
|
Eof => chain!(error!(UnexpectedEof), ret!()),
|
|
},
|
|
NodeKind::Command => match self.lookahead.kind {
|
|
Whitespace => leaf!(Whitespace),
|
|
Comment => leaf!(Comment),
|
|
PlainWord | EscapedChar => call!(Word),
|
|
Newlines | Semicolon | Eof => ret!(),
|
|
Pipe => ret!(),
|
|
},
|
|
NodeKind::Word => match self.lookahead.kind {
|
|
PlainWord => leaf!(PlainWord),
|
|
EscapedChar => leaf!(EscapedChar),
|
|
Comment | Whitespace | Newlines | Semicolon | Pipe | Eof => ret!(),
|
|
},
|
|
_ => unreachable!(),
|
|
},
|
|
}
|
|
}
|
|
}
|