summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraham Northup <grissess@nexusg.org>2017-09-21 16:31:40 -0400
committerGraham Northup <grissess@nexusg.org>2017-09-21 16:31:40 -0400
commit3d370b9a980d88f884ddd87b62bc785c3b963e1d (patch)
tree2f35167101c09891ed48bc8cbf3603e993ec7341
parentdcfc4e82386f41bd36c3b102512bd225fc5331b6 (diff)
Tokenizer for DSL
-rw-r--r--src/lang/mod.rs12
-rw-r--r--src/lang/tokenizer.rs395
-rw-r--r--src/lib.rs2
-rw-r--r--src/proto.rs46
4 files changed, 449 insertions, 6 deletions
diff --git a/src/lang/mod.rs b/src/lang/mod.rs
new file mode 100644
index 0000000..8256f34
--- /dev/null
+++ b/src/lang/mod.rs
@@ -0,0 +1,12 @@
+pub mod tokenizer;
+pub use self::tokenizer::Tokenizer;
+
+pub enum Token {
+ Ident(String),
+ Integer(isize),
+ Float(f32),
+ Oper(char),
+ String(String),
+ EOF,
+}
+
diff --git a/src/lang/tokenizer.rs b/src/lang/tokenizer.rs
new file mode 100644
index 0000000..d1b34e0
--- /dev/null
+++ b/src/lang/tokenizer.rs
@@ -0,0 +1,395 @@
+use std::collections::HashMap;
+use std::error::Error;
+use std::fmt;
+use super::*;
+
+pub struct Lexemes {
+ radix_point: char,
+ exponent_chars: String,
+ string_delim: String,
+ esc_intro: char,
+ esc_hex: char,
+ esc_oct: char,
+ com_outer: char,
+ com_inner: char,
+ escapes: HashMap<char, char>
+}
+
+impl Default for Lexemes {
+ fn default() -> Lexemes {
+ let mut ret = Lexemes {
+ radix_point: '.',
+ exponent_chars: "eE".to_string(),
+ string_delim: "'\"".to_string(),
+ esc_intro: '\\',
+ esc_hex: 'x',
+ esc_oct: 'o',
+ com_outer: '/',
+ com_inner: '*',
+ escapes: HashMap::new(),
+ };
+
+ ret.escapes.insert('n', '\n');
+ ret.escapes.insert('t', '\t');
+ ret.escapes.insert('r', '\r');
+ ret.escapes.insert('"', '"');
+ ret.escapes.insert('\'', '\'');
+
+ ret
+ }
+}
+
+#[derive(Debug)]
+pub enum Location {
+ InString,
+ InStringEscape,
+}
+
+#[derive(Debug)]
+pub enum EscapeKind {
+ Hexadecimal,
+ Octal,
+}
+
+#[derive(Debug)]
+pub enum NumericKind {
+ Integer,
+ Float,
+}
+
+#[derive(Debug)]
+pub enum ErrorKind {
+ UnexpectedEOF(Location),
+ BadEscapeValue(EscapeKind, String, Option<Box<Error>>),
+ BadNumericLiteral(NumericKind, String, Option<Box<Error>>),
+ UnknownChar(char),
+}
+
+#[derive(Debug)]
+pub struct ErrorType {
+ pub kind: ErrorKind,
+ desc: String,
+}
+
+impl ErrorType {
+ pub fn new(kind: ErrorKind) -> ErrorType {
+ let mut ret = ErrorType {
+ kind: kind,
+ desc: "".to_string(),
+ };
+
+ ret.desc = match &ret.kind {
+ &ErrorKind::UnexpectedEOF(ref loc) => format!("Unexpected EOF {}", match loc {
+ &Location::InString => "in string constant",
+ &Location::InStringEscape => "in string escape",
+ }),
+ &ErrorKind::BadEscapeValue(ref kind, ref val, ref err) => format!("Bad {} escape {}: {:?}", match kind {
+ &EscapeKind::Hexadecimal => "hexadecimal",
+ &EscapeKind::Octal => "octal",
+ }, val, err),
+ &ErrorKind::BadNumericLiteral(ref kind, ref val, ref err) => format!("Bad {} literal {}: {:?}", match kind {
+ &NumericKind::Integer => "integer",
+ &NumericKind::Float => "floating point",
+ }, val, err),
+ &ErrorKind::UnknownChar(c) => format!("Unknown character {}", c),
+ };
+
+ ret
+ }
+
+ pub fn with_description(kind: ErrorKind, description: String) -> ErrorType {
+ ErrorType {
+ kind: kind,
+ desc: description,
+ }
+ }
+}
+
+impl Error for ErrorType {
+ fn description<'a>(&'a self) -> &'a str {
+ &self.desc
+ }
+
+ fn cause(&self) -> Option<&Error> {
+ match &self.kind {
+ &ErrorKind::UnexpectedEOF(_) => None,
+ &ErrorKind::BadEscapeValue(_, _, ref err) => match err {
+ &Some(ref err) => Some(&**err),
+ &None => None,
+ },
+ &ErrorKind::BadNumericLiteral(_, _, ref err) => match err {
+ &Some(ref err) => Some(&**err),
+ &None => None,
+ },
+ &ErrorKind::UnknownChar(_) => None,
+ }
+ }
+}
+
+impl fmt::Display for ErrorType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ write!(f, "{}", self.description())
+ }
+}
+
+// NB: linear in size of set. This is practically fine for very small sets, but shouldn't be used
+// otherwise.
+fn char_in(s: &str, c: char) -> bool {
+ s.chars().find(|&x| x == c).map_or(false, |_| true)
+}
+
+pub struct Tokenizer<T: Iterator<Item=char>> {
+ reader: T,
+ pushback: Option<char>,
+ lexemes: Lexemes,
+}
+
+impl<T: Iterator<Item=char>> Tokenizer<T> {
+ pub fn new(reader: T) -> Tokenizer<T> {
+ Tokenizer {
+ reader: reader,
+ pushback: None,
+ lexemes: Default::default(),
+ }
+ }
+
+ fn push_back(&mut self, c: char) -> bool {
+ match self.pushback {
+ None => {
+ self.pushback = Some(c);
+ true
+ },
+ Some(_) => false,
+ }
+ }
+
+ fn next_char(&mut self) -> Option<char> {
+ match self.pushback {
+ Some(c) => {
+ self.pushback = None;
+ Some(c)
+ },
+ None => self.reader.next(),
+ }
+ }
+
+ fn next_token(&mut self) -> Result<Token, ErrorType> {
+ let mut c = self.next_char();
+ if c == None {
+ return Ok(Token::EOF);
+ }
+ let mut cc = c.unwrap();
+
+ while cc.is_whitespace() {
+ c = self.next_char();
+ if c == None {
+ return Ok(Token::EOF);
+ }
+ cc = c.unwrap();
+ }
+
+ /* Comments */
+ if cc == self.lexemes.com_outer {
+ let nc = self.next_char();
+ if nc == None {
+ return Ok(Token::Oper(cc));
+ }
+ let ncc = nc.unwrap();
+ if ncc == self.lexemes.com_inner {
+ loop {
+ match self.next_char() {
+ None => return Ok(Token::EOF),
+ Some(x) if x == self.lexemes.com_inner => match self.next_char() {
+ None => return Ok(Token::EOF),
+ Some(x) if x == self.lexemes.com_outer => return self.next_token(),
+ Some(_) => continue,
+ },
+ Some(_) => continue,
+ }
+ }
+ } else {
+ self.push_back(ncc);
+ return Ok(Token::Oper(cc));
+ }
+ }
+
+ /* Strings */
+ if char_in(&self.lexemes.string_delim, cc) {
+ let mut buffer = String::new();
+
+ loop {
+ let nc = self.next_char();
+ if nc == None {
+ return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InString)));
+ }
+ let ncc = nc.unwrap();
+ if ncc == self.lexemes.esc_intro {
+ let ec = self.next_char();
+ if ec == None {
+ return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape)));
+ }
+ let ecc = ec.unwrap();
+
+ if ecc == self.lexemes.esc_hex {
+ let mut value = String::new();
+ loop {
+ let sc = self.next_char();
+ if None == sc {
+ return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape)));
+ }
+ let scc = sc.unwrap();
+
+ if scc.is_digit(16) {
+ value.push(scc);
+ } else {
+ self.push_back(scc);
+ break;
+ }
+ }
+ let rc = u32::from_str_radix(&value, 16);
+ if let Err(err) = rc {
+ return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Hexadecimal, value, Some(Box::new(err)))));
+ }
+ let rc = ::std::char::from_u32(rc.unwrap());
+ match rc {
+ Some(rcc) => buffer.push(rcc),
+ None => return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Hexadecimal, value, None))),
+ }
+ continue;
+ }
+
+ if ecc == self.lexemes.esc_oct {
+ let mut value = String::new();
+ loop {
+ let sc = self.next_char();
+ if None == sc {
+ return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape)));
+ }
+ let scc = sc.unwrap();
+
+ if scc.is_digit(8) {
+ value.push(scc);
+ } else {
+ self.push_back(scc);
+ break;
+ }
+ }
+ let rc = u32::from_str_radix(&value, 8);
+ if let Err(err) = rc {
+ return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Octal, value, Some(Box::new(err)))));
+ }
+ let rc = ::std::char::from_u32(rc.unwrap());
+ match rc {
+ Some(rcc) => buffer.push(rcc),
+ None => return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Octal, value, None))),
+ }
+ continue;
+ }
+
+ buffer.push(*self.lexemes.escapes.get(&ecc).unwrap_or(&ecc));
+ continue;
+ }
+
+ if ncc == cc {
+ return Ok(Token::String(buffer));
+ }
+
+ buffer.push(ncc);
+ }
+ }
+
+ /* Numeric constants */
+ if cc.is_digit(10) {
+ let mut radix = 10;
+ let mut buffer = String::new();
+ let mut floating = false;
+
+ if cc == '0' {
+ let nc = self.next_char();
+ if nc == None {
+ return Ok(Token::Integer(0));
+ }
+ let ncc = nc.unwrap();
+
+ if ncc == self.lexemes.esc_hex {
+ radix = 16;
+ } else if ncc == self.lexemes.esc_oct {
+ radix = 8;
+ } else {
+ buffer.push(cc);
+ buffer.push(ncc);
+ }
+ }
+
+ loop {
+ let dc = self.next_char();
+ if dc == None {
+ break;
+ }
+ let dcc = dc.unwrap();
+
+ if dcc.is_digit(radix) {
+ buffer.push(dcc);
+ } else if dcc == self.lexemes.radix_point {
+ floating = true;
+ buffer.push(dcc);
+ } else if floating && char_in(&self.lexemes.exponent_chars, dcc) {
+ buffer.push(dcc);
+ } else {
+ self.push_back(dcc);
+ break;
+ }
+ }
+
+ return if floating {
+ match buffer.parse::<f32>() {
+ Ok(v) => Ok(Token::Float(v)),
+ Err(err) => Err(ErrorType::new(ErrorKind::BadNumericLiteral(NumericKind::Float, buffer, Some(Box::new(err))))),
+ }
+ } else {
+ match buffer.parse::<isize>() {
+ Ok(v) => Ok(Token::Integer(v)),
+ Err(err) => Err(ErrorType::new(ErrorKind::BadNumericLiteral(NumericKind::Integer, buffer, Some(Box::new(err))))),
+ }
+ };
+ }
+
+ /* Identifiers */
+ if cc.is_xid_start() {
+ let mut buffer = String::new();
+ buffer.push(cc);
+
+ loop {
+ let nc = self.next_char();
+ if nc == None {
+ return Ok(Token::Ident(buffer));
+ }
+ let ncc = nc.unwrap();
+
+ if ncc.is_xid_continue() {
+ buffer.push(ncc);
+ } else {
+ self.push_back(ncc);
+ break;
+ }
+ }
+
+ return Ok(Token::Ident(buffer));
+ }
+
+ /* Everything else */
+ return Ok(Token::Oper(cc));
+ }
+}
+
+impl<T: Iterator<Item=char>> Iterator for Tokenizer<T> {
+ type Item = Token;
+
+ fn next(&mut self) -> Option<Token> {
+ match self.next_token() {
+ Err(_) => None,
+ Ok(Token::EOF) => None,
+ Ok(t) => Some(t),
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 4e08724..c0bd79c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,5 @@
#![feature(associated_consts)]
+#![feature(unicode)]
extern crate byteorder;
extern crate rand;
@@ -8,6 +9,7 @@ pub use types::*;
pub mod synth;
pub mod proto;
+pub mod lang;
#[cfg(test)]
mod tests {
diff --git a/src/proto.rs b/src/proto.rs
index c202a10..1af1c72 100644
--- a/src/proto.rs
+++ b/src/proto.rs
@@ -13,7 +13,7 @@ pub enum Command {
Play{sec: u32, usec: u32, freq: u32, amp: f32, voice: u32},
Caps{voices: u32, tp: [u8; 4], ident: [u8; 24]},
PCM{samples: [i16; 16]},
- Unknown{data: [u8; 36]},
+ Unknown{data: [u8; Command::SIZE]},
}
impl Command {
@@ -34,10 +34,10 @@ impl Command {
}
}
-impl<'a> From<&'a [u8; 36]> for Command {
- fn from(packet: &'a [u8; 36]) -> Command {
- let mut fields_u32: [u32; 9] = unsafe { mem::uninitialized() };
- let mut fields_f32: [f32; 9] = unsafe { mem::uninitialized() };
+impl<'a> From<&'a [u8; Command::SIZE]> for Command {
+ fn from(packet: &'a [u8; Command::SIZE]) -> Command {
+ let mut fields_u32: [u32; Command::SIZE / 4] = unsafe { mem::uninitialized() };
+ let mut fields_f32: [f32; Command::SIZE / 4] = unsafe { mem::uninitialized() };
NetworkEndian::read_u32_into(packet, &mut fields_u32);
unsafe { NetworkEndian::read_f32_into_unchecked(packet, &mut fields_f32); }
@@ -73,10 +73,44 @@ impl<'a> From<&'a [u8; 36]> for Command {
Command::PCM{samples: samples}
},
_ => {
- let mut data: [u8; 36] = unsafe { mem::uninitialized() };
+ let mut data: [u8; Command::SIZE] = unsafe { mem::uninitialized() };
data.copy_from_slice(packet);
Command::Unknown{data: data}
}
}
}
}
+
+impl<'a> From<&'a Command> for [u8; Command::SIZE] {
+ fn from(cmd: &'a Command) -> [u8; Command::SIZE] {
+ let mut ret: [u8; Command::SIZE] = [0u8; Command::SIZE];
+
+ match *cmd {
+ Command::KeepAlive => NetworkEndian::write_u32(&mut ret[..4], 0),
+ Command::Ping{data} => {
+ NetworkEndian::write_u32(&mut ret[..4], 1);
+ (&mut ret[4..]).copy_from_slice(&data);
+ },
+ Command::Quit => NetworkEndian::write_u32(&mut ret[..4], 2),
+ Command::Play{sec, usec, freq, amp, voice} => {
+ NetworkEndian::write_u32_into(&[3u32, sec, usec, freq], &mut ret[..16]);
+ NetworkEndian::write_f32(&mut ret[16..20], amp);
+ NetworkEndian::write_u32(&mut ret[20..24], voice);
+ },
+ Command::Caps{voices, tp, ident} => {
+ NetworkEndian::write_u32_into(&[4u32, voices], &mut ret[..8]);
+ (&mut ret[8..12]).copy_from_slice(&tp);
+ (&mut ret[12..]).copy_from_slice(&ident);
+ },
+ Command::PCM{samples} => {
+ NetworkEndian::write_u32(&mut ret[..4], 5);
+ NetworkEndian::write_i16_into(&samples, &mut ret[4..]);
+ },
+ Command::Unknown{data} => {
+ ret.copy_from_slice(&data);
+ },
+ };
+
+ ret
+ }
+}