oysh/crates/oyster_parser/src/parser.rs

188 lines
5.5 KiB
Rust

use thiserror::Error;
use crate::{lexer::Lexer, Token, TokenKind};
/// Errors that might occur during parsing.
#[derive(Debug, Error)]
pub enum ParseError {
#[error("unexpected pipe")]
UnexpectedPipe,
#[error("unexpected end of file")]
UnexpectedEof,
#[error("unexpected semicolon in the middle of statement")]
UnexpectedSemicolon,
}
/// Type of the node.
#[derive(Debug)]
pub enum NodeKind {
Whitespace,
Newlines,
Semicolon,
Pipe,
PlainWord,
EscapedChar,
Comment,
Program,
Pipeline,
Command,
Word,
/// Read a pipe but didn't start word yet.
PipelineCont,
}
/// Events required to build a syntax tree.
#[derive(Debug)]
pub enum ParseEvent {
Error(ParseError, usize),
StartNode(NodeKind),
EndNode,
NewLeaf(NodeKind, usize),
}
/// Parse a given source code string.
/// The parsing is decoupled from building the AST and parse tree,
/// instead of returning the tree directly, it returns a linear representation,
/// containing instructions on how to construct the trees.
/// StartNode(Program) and EndNode at the end is implied.
#[derive(Debug)]
pub struct Parser<'a> {
lex: Lexer<'a>,
lookahead: Token,
buffer: Option<ParseEvent>,
stack: Vec<NodeKind>,
}
impl Parser<'_> {
/// Create a new parser.
pub fn new(source: &str) -> Parser {
let mut lex = Lexer::new(source);
let lookahead = lex.next_command_token();
let mut stack = Vec::with_capacity(5);
stack.push(NodeKind::Program);
Parser {
lex,
lookahead,
buffer: None,
stack,
}
}
}
impl Iterator for Parser<'_> {
type Item = ParseEvent;
fn next(&mut self) -> Option<Self::Item> {
// This is essentially a recursive descent parser.
// The macros somewhat represent things you would do in a true RD implementation.
macro_rules! chain {
($a:expr, $b:expr) => {{
$b;
$a
}};
}
macro_rules! chain_buf {
($a:expr, $b:expr) => {{
self.buffer = $b;
$a
}};
}
macro_rules! call {
($rule:expr) => {{
use NodeKind::*;
self.stack.push($rule);
Some(ParseEvent::StartNode($rule))
}};
}
macro_rules! ret {
() => {{
self.stack.pop();
Some(ParseEvent::EndNode)
}};
}
macro_rules! tailcall {
($rule:expr) => {{
use NodeKind::*;
self.stack.pop();
self.stack.push($rule);
Some(ParseEvent::StartNode($rule))
}};
}
macro_rules! leaf {
($type:expr) => {{
use NodeKind::*;
let len = self.lookahead.len;
self.lookahead = self.lex.next_command_token();
Some(ParseEvent::NewLeaf($type, len))
}};
}
macro_rules! error {
($type:expr) => {{
use ParseError::*;
let len = self.lookahead.len;
self.lookahead = self.lex.next_command_token();
Some(ParseEvent::Error($type, len))
}};
}
use TokenKind::*;
if let Some(ev) = self.buffer.take() {
return Some(ev);
}
match self.stack.last() {
None => None,
Some(nt) => match nt {
NodeKind::Program => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Newlines => leaf!(Newlines),
Semicolon => leaf!(Semicolon),
Comment => leaf!(Comment),
PlainWord | EscapedChar => call!(Pipeline),
Pipe => error!(UnexpectedPipe),
Eof => chain!(None, ret!()), // return silently
},
NodeKind::Pipeline => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Comment => leaf!(Comment),
Pipe => chain!(leaf!(Pipe), call!(PipelineCont)),
PlainWord | EscapedChar => call!(Command),
Newlines | Semicolon | Eof => ret!(),
},
NodeKind::PipelineCont => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Newlines => leaf!(Newlines),
Comment => leaf!(Comment),
PlainWord | EscapedChar => tailcall!(Command),
Semicolon => chain_buf!(chain!(error!(UnexpectedSemicolon), ret!()), ret!()),
Pipe => chain!(error!(UnexpectedPipe), ret!()),
Eof => chain!(error!(UnexpectedEof), ret!()),
},
NodeKind::Command => match self.lookahead.kind {
Whitespace => leaf!(Whitespace),
Comment => leaf!(Comment),
PlainWord | EscapedChar => call!(Word),
Newlines | Semicolon | Eof => ret!(),
Pipe => ret!(),
},
NodeKind::Word => match self.lookahead.kind {
PlainWord => leaf!(PlainWord),
EscapedChar => leaf!(EscapedChar),
Comment | Whitespace | Newlines | Semicolon | Pipe | Eof => ret!(),
},
_ => unreachable!(),
},
}
}
}