diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..935c001 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +*~* +*\#* +.cache diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..414dead --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "xmk" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..58b1ed4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "xmk" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/doc/grammar.bnf b/doc/grammar.bnf new file mode 100644 index 0000000..63c9720 --- /dev/null +++ b/doc/grammar.bnf @@ -0,0 +1,4 @@ +MODULE ::= INSTR* +INSTR ::= EXPR semicolon +EXPR ::= BUILTIN +BUILTIN ::= bool diff --git a/src/ast/error.rs b/src/ast/error.rs new file mode 100644 index 0000000..c3eba62 --- /dev/null +++ b/src/ast/error.rs @@ -0,0 +1,16 @@ +use std::fmt; + +#[derive(Debug)] +pub struct AstError { + pub msg: String, + pub line: i32 +} + +impl fmt::Display for AstError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[ERR] {}: {}", self.line, self.msg) + } +} + +impl std::error::Error for AstError { +} diff --git a/src/ast/lexer.rs b/src/ast/lexer.rs new file mode 100644 index 0000000..e1e1886 --- /dev/null +++ b/src/ast/lexer.rs @@ -0,0 +1,210 @@ +use crate::ast::node; + +struct Token { + position: usize, + value: String, +} + +pub struct Lexer { + cursor: usize, + text: String, + line: i32, + separators: Vec +} + +impl Lexer { + pub fn new() -> Self { + Self { + cursor: 0, + line: 1, + text: String::from(""), + separators: vec![ + ';' + ] + } + } + + pub fn is_done(&self) -> bool { + self.cursor >= self.text.len() + } + + pub fn line(&self) -> i32 { + self.line + } + + pub fn prepare(&mut self, text: &str) { + self.text = text.to_owned(); + self.cursor = 0; + self.line = 1; + } + + fn is_sep(&self, pos: usize) -> bool { + if let Some(c) = self.text.chars().nth(pos) { + + let el = self.separators.iter().find(|&&x| x == c); + + if el.is_some() { + return true; + } + + return c.is_whitespace(); + } + + false + } + + fn scan_text(&self, text: &str) -> Option { + let c = self.cursor; + + if c + text.len() > self.text.len() { + return None; + } + + for i in 0..text.len() { + if self.text.chars().nth(c + i).unwrap() != text.chars().nth(i).unwrap() { + return None; + } + } + + Some(Token { + position: c + text.len(), + value: text.to_string().clone() + }) + } + + fn scan_keyword(&self, text: &str) -> Option { + let c = self.cursor; + + if c + text.len() > self.text.len() { + return None; + } + + for i in 0..text.len() { + if self.text.chars().nth(c + i).unwrap() != text.chars().nth(i).unwrap() { + return None; + } + } + + if c + text.len() < self.text.len() && !self.is_sep(c + text.len()) { + return None; + } + + Some(Token { + position: c + text.len(), + value: text.to_string().clone() + }) + } + + fn skip_spaces(&mut self) { + while self.cursor < self.text.len() { + + if let Some(c) = self.text.chars().nth(self.cursor) { + if c == '\n' { + self.line += 1; + } + + if !c.is_whitespace() { break; } + } + + self.cursor += 1; + } + } +} + +impl Iterator for Lexer { + type Item = node::Node; + + fn next(&mut self) -> Option { + self.skip_spaces(); + + if let Some(token) = self.scan_keyword("true") { + self.cursor = token.position; + return Some(node::Node::Bool(token.value == "true")); + } + + if let Some(token) = self.scan_keyword("false") { + self.cursor = token.position; + return Some(node::Node::Bool(token.value == "true")); + } + + if let Some(token) = self.scan_text(";") { + self.cursor = token.position; + return Some(node::Node::Semicolon); + } + + None + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::ast::error; + + fn lexer_run(lexer: &mut Lexer, text: &str) -> Result, error::AstError> { + lexer.prepare(text); + let result = lexer.collect(); + + if lexer.cursor < lexer.text.len() { + let mut val = String::from(""); + for i in lexer.cursor..lexer.text.len() { + let c = lexer.text.chars().nth(i).unwrap(); + if c.is_whitespace() { + break; + } + val.push(c); + } + + return Err(error::AstError { + msg: format!("unknown symbol <{}>", val), + line: lexer.line + }); + } + + Ok(result) + } + #[test] + fn test_booleans() { + let mut lex = Lexer::new(); + let res = lexer_run(&mut lex, "true false").unwrap(); + assert_eq!(2, res.len()); + assert_eq!("Bool(true)", format!("{:?}", res.get(0).unwrap())); + assert_eq!("Bool(false)", format!("{:?}", res.get(1).unwrap())); + } + + #[test] + fn test_keyword_and_text() { + let mut lex = Lexer::new(); + let res = lexer_run(&mut lex, "true;").unwrap(); + assert_eq!(2, res.len()); + assert_eq!("Bool(true)", format!("{:?}", res.get(0).unwrap())); + assert_eq!("Semicolon", format!("{:?}", res.get(1).unwrap())); + } + + #[test] + fn test_wrong_ident() { + let mut lex = Lexer::new(); + let res = lexer_run(&mut lex, " truea "); + assert!(res.is_err()); + let mut lex = Lexer::new(); + let res = lexer_run(&mut lex, " atrue "); + assert!(res.is_err()); + } + + #[test] + fn test_error() { + let mut lex = Lexer::new(); + let res = lexer_run(&mut lex, " ยง "); + assert!(res.is_err()); + } + + #[test] + fn test_punctuation() -> Result<(), error::AstError> { + let mut lex = Lexer::new(); + let res = lexer_run(&mut lex, " ;; ")?; + assert_eq!(2, res.len()); + assert_eq!("Semicolon", format!("{:?}", res.get(0).unwrap())); + assert_eq!("Semicolon", format!("{:?}", res.get(1).unwrap())); + Ok(()) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..c10fdc1 --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,4 @@ +pub mod error; +pub mod node; +pub mod lexer; +pub mod parser; diff --git a/src/ast/node.rs b/src/ast/node.rs new file mode 100644 index 0000000..f3a43ec --- /dev/null +++ b/src/ast/node.rs @@ -0,0 +1,9 @@ +#[derive(Debug)] +#[derive(PartialEq)] +pub enum Node { + Module(String, Vec), + Bool(bool), + Semicolon +} + + diff --git a/src/ast/parser.rs b/src/ast/parser.rs new file mode 100644 index 0000000..faab2eb --- /dev/null +++ b/src/ast/parser.rs @@ -0,0 +1,143 @@ +use crate::ast::{ + node, + lexer, + error +}; + +pub struct Parser { + module_name: String, + lexer: lexer::Lexer +} + +type ParseResult = Result, error::AstError>; + +impl Parser { + pub fn new(module_name: &str, lex: lexer::Lexer) -> Self { + Self { + module_name: module_name.to_owned(), + lexer: lex + } + } + + pub fn run(&mut self) -> ParseResult { + self.parse_module() + } + + fn parse_module(&mut self) -> ParseResult { + let mut children: Vec = vec![]; + + loop { + match self.parse_instr() { + Ok(Some(instr)) => { children.push(instr); }, + Ok(None) => break, + Err(err) => { return Err(err); } + } + } + + if !self.lexer.is_done() { + return Err(error::AstError { + msg: String::from("premature end"), + line: self.lexer.line() + }); + } + + let n = node::Node::Module(self.module_name.clone(), children); + Ok(Some(n)) + } + + fn parse_instr(&mut self) -> ParseResult { + match self.parse_expr() { + Ok(Some(expr)) => { + match self.lexer.next() { + Some(node::Node::Semicolon) => Ok(Some(expr)), + None => Ok(None), + _ => Err(error::AstError { + msg: String::from("missing semicolon"), + line: self.lexer.line()}) + } + }, + Err(err) => { Err(err) }, + Ok(None) => Ok(None) + } + } + + fn parse_expr(&mut self) -> ParseResult { + self.parse_builtin() + } + + + fn parse_builtin(&mut self) -> ParseResult { + match self.lexer.next() { + Some(node::Node::Bool(value)) + => Ok(Some(node::Node::Bool(value))), + + Some(other) => { + Err(error::AstError { + msg: String::from(format!("unexpected {:?}", other)), + line: self.lexer.line() + }) + } + + None => Ok(None) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::ast::node::Node; + + type TestResult = Result<(), String>; + + fn test_parser(oracle: node::Node, source: &str) -> TestResult { + let mut lex = lexer::Lexer::new(); + lex.prepare(source); + let mut parser = Parser::new("mod", lex); + + match parser.run() { + Ok(Some(root)) => { + if root == oracle { + Ok(()) + } else { + Err(format!("expected: {:?} != got: {:?}", + oracle, root)) + } + }, + Err(err) => Err(format!("{}: {}", err.line, err.msg)), + Ok(None) => Err(format!("{} doesnt produce anything", source)) + } + } + + #[test] + fn premature_end() -> TestResult { + let mut lex = lexer::Lexer::new(); + lex.prepare("coucou"); + let mut parser = Parser::new("mod", lex); + + match parser.run() { + Err(_) => Ok(()), + _ => Err(String::from("premature end not detected")) + } + } + + #[test] + fn simple_booleans() -> TestResult { + test_parser( + Node::Module("mod".to_owned(), vec![ + Node::Bool(true) + ]), + " true; " + )?; + + test_parser( + Node::Module("mod".to_owned(), vec![ + Node::Bool(true), + Node::Bool(false) + ]), + " true; false; " + )?; + + Ok(()) + } +} diff --git a/src/eval/error.rs b/src/eval/error.rs new file mode 100644 index 0000000..4884955 --- /dev/null +++ b/src/eval/error.rs @@ -0,0 +1,15 @@ +use std::fmt; + +#[derive(Debug)] +pub struct EvalError { + pub msg: String, + pub line: i32 +} + +impl fmt::Display for EvalError { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "[ERR]:{} {}", self.line, self.msg) + } +} + +impl std::error::Error for EvalError {} diff --git a/src/eval/evaluator.rs b/src/eval/evaluator.rs new file mode 100644 index 0000000..eb94a49 --- /dev/null +++ b/src/eval/evaluator.rs @@ -0,0 +1,82 @@ +use crate::{ + ast::node::Node, + eval::{ + value::Value, + error::EvalError + } +}; + +pub struct Evaluator { + +} + +type EvalResult = Result, EvalError>; + +impl Evaluator { + pub fn new() -> Self { + Self { + } + } + + pub fn run(&mut self, root: &Node) -> EvalResult { + match root { + Node::Module(_, nodes) => { + let mut res = None; + + for node in nodes.iter() { + res = self.run(&node)? + } + + Ok(res) + }, + Node::Bool(value) => Ok(Some(Value::Bool(*value))), + _ => Ok(None) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::ast::{ + lexer::Lexer, + parser::Parser + }; + + type TestResult = Result<(), Box>; + + fn test_eval_value(value: Value, source: &str) -> TestResult { + let mut lex = Lexer::new(); + lex.prepare(source); + + let mut parser = Parser::new("mod", lex); + let mut evaluator = Evaluator::new(); + let root = parser.run()?; + + match root { + Some(root) => { + if let Some(val) = evaluator.run(&root)? { + if val != value { + Err(Box::new(EvalError { + msg: format!("expected: {:?}, got: {:?}", value, val), + line: 0 + })) + } else { + Ok(()) + } + } else { + Ok(()) + } + }, + None => Ok(()) + } + } + + #[test] + fn simple_boolean() -> TestResult { + test_eval_value(Value::Bool(true), " true; false; true; ")?; + test_eval_value(Value::Bool(false), " true; false; ")?; + + Ok(()) + } +} diff --git a/src/eval/mod.rs b/src/eval/mod.rs new file mode 100644 index 0000000..489ede0 --- /dev/null +++ b/src/eval/mod.rs @@ -0,0 +1,3 @@ +pub mod value; +pub mod evaluator; +pub mod error; diff --git a/src/eval/value.rs b/src/eval/value.rs new file mode 100644 index 0000000..8d74361 --- /dev/null +++ b/src/eval/value.rs @@ -0,0 +1,6 @@ +#[derive(PartialEq)] +#[derive(Debug)] +pub enum Value { + Bool(bool) +} + diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..8506845 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,27 @@ +use std::fs; + +mod ast; +mod eval; + +fn main() -> Result<(), Box> { + let args: Vec = std::env::args().collect(); + + if args.len() > 1 { + + let content = fs::read_to_string(args.get(1).unwrap())?; + + let mut lex = ast::lexer::Lexer::new(); + lex.prepare(content.as_str()); + + let mut parser = ast::parser::Parser::new(args.get(1).unwrap(), lex); + let root = parser.run()?.unwrap(); + + let mut evaluator = eval::evaluator::Evaluator::new(); + + if let Some(val) = evaluator.run(&root)? { + println!("{:?}", val); + } + } + + Ok(()) +}