summaryrefslogtreecommitdiff
path: root/src/lang/tokenizer.rs
diff options
context:
space:
mode:
authorGraham Northup <grissess@nexusg.org>2017-10-03 14:50:18 -0400
committerGraham Northup <grissess@nexusg.org>2017-10-03 14:50:18 -0400
commit22dd5bc5862e91330c8da0d5f141998cdbe546fb (patch)
tree253cb9f6b080caaad18b2df9f63004266bf73c27 /src/lang/tokenizer.rs
parent145e2771c0d1ad30748da6e6ef1fabbd4cc2478c (diff)
Finished clientside parsing, starting graphics
Diffstat (limited to 'src/lang/tokenizer.rs')
-rw-r--r--src/lang/tokenizer.rs121
1 files changed, 112 insertions, 9 deletions
diff --git a/src/lang/tokenizer.rs b/src/lang/tokenizer.rs
index 74b304d..1d62f3e 100644
--- a/src/lang/tokenizer.rs
+++ b/src/lang/tokenizer.rs
@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::error::Error;
-use std::fmt;
+use std::{fmt, io, fs};
+use std::io::Read;
use super::*;
use unicode_xid::UnicodeXID;
@@ -13,6 +14,7 @@ pub struct Lexemes {
esc_oct: char,
com_outer: char,
com_inner: char,
+ include_delim: char,
escapes: HashMap<char, char>
}
@@ -27,6 +29,7 @@ impl Default for Lexemes {
esc_oct: 'o',
com_outer: '/',
com_inner: '*',
+ include_delim: '#',
escapes: HashMap::new(),
};
@@ -44,6 +47,7 @@ impl Default for Lexemes {
pub enum Location {
InString,
InStringEscape,
+ InInclude,
}
#[derive(Debug)]
@@ -64,6 +68,8 @@ pub enum ErrorKind {
BadEscapeValue(EscapeKind, String, Option<Box<Error>>),
BadNumericLiteral(NumericKind, String, Option<Box<Error>>),
UnknownChar(char),
+ IncludeError(io::Error),
+ TooManyRecursions(usize),
}
#[derive(Debug)]
@@ -83,6 +89,7 @@ impl ErrorType {
ErrorKind::UnexpectedEOF(ref loc) => format!("Unexpected EOF {}", match *loc {
Location::InString => "in string constant",
Location::InStringEscape => "in string escape",
+ Location::InInclude => "in include",
}),
ErrorKind::BadEscapeValue(ref kind, ref val, ref err) => format!("Bad {} escape {}: {:?}", match *kind {
EscapeKind::Hexadecimal => "hexadecimal",
@@ -93,6 +100,8 @@ impl ErrorType {
NumericKind::Float => "floating point",
}, val, err),
ErrorKind::UnknownChar(c) => format!("Unknown character {}", c),
+ ErrorKind::IncludeError(ref e) => format!("Error including file: {:?}", e),
+ ErrorKind::TooManyRecursions(n) => format!("Include recursed too many times ({})", n),
};
ret
@@ -117,7 +126,7 @@ impl Error for ErrorType {
Some(ref err) => Some(&**err),
None => None,
},
- ErrorKind::UnexpectedEOF(_) | ErrorKind::UnknownChar(_) => None,
+ _ => None,
}
}
}
@@ -134,16 +143,56 @@ fn char_in(s: &str, c: char) -> bool {
s.chars().find(|&x| x == c).map_or(false, |_| true)
}
+pub struct ResumableChars {
+ string: String,
+ pos: usize,
+}
+
+impl ResumableChars {
+ pub fn new(s: String) -> ResumableChars {
+ ResumableChars {
+ string: s,
+ pos: 0,
+ }
+ }
+}
+
+impl Iterator for ResumableChars {
+ type Item = char;
+
+ fn next(&mut self) -> Option<char> {
+ if self.pos >= self.string.len() {
+ None
+ } else {
+ let mut iter = self.string[self.pos..].char_indices();
+ match iter.next() {
+ Some((pos, ch)) => {
+ self.pos += match iter.next() {
+ Some((pos, _)) => pos,
+ None => self.string.len(),
+ };
+ Some(ch)
+ },
+ None => None,
+ }
+ }
+ }
+}
+
pub struct Tokenizer<T: Iterator<Item=char>> {
reader: T,
+ reader_stack: Vec<ResumableChars>,
pushback: Option<char>,
lexemes: Lexemes,
}
impl<T: Iterator<Item=char>> Tokenizer<T> {
+ const MAX_INCLUDE_RECURSIONS: usize = 256;
+
pub fn new(reader: T) -> Tokenizer<T> {
Tokenizer {
reader: reader,
+ reader_stack: Vec::new(),
pushback: None,
lexemes: Default::default(),
}
@@ -159,23 +208,49 @@ impl<T: Iterator<Item=char>> Tokenizer<T> {
}
}
+ pub fn push_reader(&mut self, rc: ResumableChars) -> Result<(), ErrorType> {
+ if self.reader_stack.len() > Self::MAX_INCLUDE_RECURSIONS {
+ Err(ErrorType::new(ErrorKind::TooManyRecursions(self.reader_stack.len())))
+ } else {
+ self.reader_stack.push(rc);
+ Ok(())
+ }
+ }
+
fn next_char(&mut self) -> Option<char> {
match self.pushback {
Some(c) => {
self.pushback = None;
Some(c)
},
- None => self.reader.next(),
+ None => {
+ let mut ret = None;
+ let mut produced_idx: usize = 0;
+ let len = self.reader_stack.len();
+
+ for (idx, rc) in self.reader_stack.iter_mut().enumerate().rev() {
+ match rc.next() {
+ Some(c) => {
+ ret = Some(c);
+ produced_idx = idx;
+ break;
+ },
+ None => {},
+ }
+ }
+
+ match ret {
+ Some(c) => {
+ self.reader_stack.truncate(produced_idx + 1);
+ Some(c)
+ },
+ None => self.reader.next(),
+ }
+ },
}
}
pub fn next_token(&mut self) -> Result<Token, ErrorType> {
- let res = self._next_token();
- eprintln!("next_token: {:?}", res);
- res
- }
-
- fn _next_token(&mut self) -> Result<Token, ErrorType> {
let mut c = self.next_char();
if c == None {
return Ok(Token::EOF);
@@ -216,6 +291,34 @@ impl<T: Iterator<Item=char>> Tokenizer<T> {
}
}
+ /* Inclusion */
+ if cc == self.lexemes.include_delim {
+ let mut buffer = String::new();
+
+ loop {
+ let nc = self.next_char();
+ if nc == None {
+ return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InInclude)));
+ }
+ let ncc = nc.unwrap();
+
+ if ncc == self.lexemes.include_delim {
+ break;
+ } else {
+ buffer.push(ncc);
+ }
+ }
+
+ let mut f = match fs::File::open(buffer) {
+ Err(err) => return Err(ErrorType::new(ErrorKind::IncludeError(err))),
+ Ok(f) => f,
+ };
+ let mut contents = String::new();
+ f.read_to_string(&mut contents);
+ self.push_reader(ResumableChars::new(contents))?;
+ return self.next_token()
+ }
+
/* Strings */
if char_in(&self.lexemes.string_delim, cc) {
let mut buffer = String::new();