diff options
author | Graham Northup <grissess@nexusg.org> | 2017-09-21 16:31:40 -0400 |
---|---|---|
committer | Graham Northup <grissess@nexusg.org> | 2017-09-21 16:31:40 -0400 |
commit | 3d370b9a980d88f884ddd87b62bc785c3b963e1d (patch) | |
tree | 2f35167101c09891ed48bc8cbf3603e993ec7341 | |
parent | dcfc4e82386f41bd36c3b102512bd225fc5331b6 (diff) |
Tokenizer for DSL
-rw-r--r-- | src/lang/mod.rs | 12 | ||||
-rw-r--r-- | src/lang/tokenizer.rs | 395 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/proto.rs | 46 |
4 files changed, 449 insertions, 6 deletions
diff --git a/src/lang/mod.rs b/src/lang/mod.rs new file mode 100644 index 0000000..8256f34 --- /dev/null +++ b/src/lang/mod.rs @@ -0,0 +1,12 @@ +pub mod tokenizer; +pub use self::tokenizer::Tokenizer; + +pub enum Token { + Ident(String), + Integer(isize), + Float(f32), + Oper(char), + String(String), + EOF, +} + diff --git a/src/lang/tokenizer.rs b/src/lang/tokenizer.rs new file mode 100644 index 0000000..d1b34e0 --- /dev/null +++ b/src/lang/tokenizer.rs @@ -0,0 +1,395 @@ +use std::collections::HashMap; +use std::error::Error; +use std::fmt; +use super::*; + +pub struct Lexemes { + radix_point: char, + exponent_chars: String, + string_delim: String, + esc_intro: char, + esc_hex: char, + esc_oct: char, + com_outer: char, + com_inner: char, + escapes: HashMap<char, char> +} + +impl Default for Lexemes { + fn default() -> Lexemes { + let mut ret = Lexemes { + radix_point: '.', + exponent_chars: "eE".to_string(), + string_delim: "'\"".to_string(), + esc_intro: '\\', + esc_hex: 'x', + esc_oct: 'o', + com_outer: '/', + com_inner: '*', + escapes: HashMap::new(), + }; + + ret.escapes.insert('n', '\n'); + ret.escapes.insert('t', '\t'); + ret.escapes.insert('r', '\r'); + ret.escapes.insert('"', '"'); + ret.escapes.insert('\'', '\''); + + ret + } +} + +#[derive(Debug)] +pub enum Location { + InString, + InStringEscape, +} + +#[derive(Debug)] +pub enum EscapeKind { + Hexadecimal, + Octal, +} + +#[derive(Debug)] +pub enum NumericKind { + Integer, + Float, +} + +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedEOF(Location), + BadEscapeValue(EscapeKind, String, Option<Box<Error>>), + BadNumericLiteral(NumericKind, String, Option<Box<Error>>), + UnknownChar(char), +} + +#[derive(Debug)] +pub struct ErrorType { + pub kind: ErrorKind, + desc: String, +} + +impl ErrorType { + pub fn new(kind: ErrorKind) -> ErrorType { + let mut ret = ErrorType { + kind: kind, + desc: "".to_string(), + }; + + ret.desc = match &ret.kind { + &ErrorKind::UnexpectedEOF(ref loc) => format!("Unexpected EOF {}", match loc { + &Location::InString => "in string constant", + &Location::InStringEscape => "in string escape", + }), + &ErrorKind::BadEscapeValue(ref kind, ref val, ref err) => format!("Bad {} escape {}: {:?}", match kind { + &EscapeKind::Hexadecimal => "hexadecimal", + &EscapeKind::Octal => "octal", + }, val, err), + &ErrorKind::BadNumericLiteral(ref kind, ref val, ref err) => format!("Bad {} literal {}: {:?}", match kind { + &NumericKind::Integer => "integer", + &NumericKind::Float => "floating point", + }, val, err), + &ErrorKind::UnknownChar(c) => format!("Unknown character {}", c), + }; + + ret + } + + pub fn with_description(kind: ErrorKind, description: String) -> ErrorType { + ErrorType { + kind: kind, + desc: description, + } + } +} + +impl Error for ErrorType { + fn description<'a>(&'a self) -> &'a str { + &self.desc + } + + fn cause(&self) -> Option<&Error> { + match &self.kind { + &ErrorKind::UnexpectedEOF(_) => None, + &ErrorKind::BadEscapeValue(_, _, ref err) => match err { + &Some(ref err) => Some(&**err), + &None => None, + }, + &ErrorKind::BadNumericLiteral(_, _, ref err) => match err { + &Some(ref err) => Some(&**err), + &None => None, + }, + &ErrorKind::UnknownChar(_) => None, + } + } +} + +impl fmt::Display for ErrorType { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}", self.description()) + } +} + +// NB: linear in size of set. This is practically fine for very small sets, but shouldn't be used +// otherwise. +fn char_in(s: &str, c: char) -> bool { + s.chars().find(|&x| x == c).map_or(false, |_| true) +} + +pub struct Tokenizer<T: Iterator<Item=char>> { + reader: T, + pushback: Option<char>, + lexemes: Lexemes, +} + +impl<T: Iterator<Item=char>> Tokenizer<T> { + pub fn new(reader: T) -> Tokenizer<T> { + Tokenizer { + reader: reader, + pushback: None, + lexemes: Default::default(), + } + } + + fn push_back(&mut self, c: char) -> bool { + match self.pushback { + None => { + self.pushback = Some(c); + true + }, + Some(_) => false, + } + } + + fn next_char(&mut self) -> Option<char> { + match self.pushback { + Some(c) => { + self.pushback = None; + Some(c) + }, + None => self.reader.next(), + } + } + + fn next_token(&mut self) -> Result<Token, ErrorType> { + let mut c = self.next_char(); + if c == None { + return Ok(Token::EOF); + } + let mut cc = c.unwrap(); + + while cc.is_whitespace() { + c = self.next_char(); + if c == None { + return Ok(Token::EOF); + } + cc = c.unwrap(); + } + + /* Comments */ + if cc == self.lexemes.com_outer { + let nc = self.next_char(); + if nc == None { + return Ok(Token::Oper(cc)); + } + let ncc = nc.unwrap(); + if ncc == self.lexemes.com_inner { + loop { + match self.next_char() { + None => return Ok(Token::EOF), + Some(x) if x == self.lexemes.com_inner => match self.next_char() { + None => return Ok(Token::EOF), + Some(x) if x == self.lexemes.com_outer => return self.next_token(), + Some(_) => continue, + }, + Some(_) => continue, + } + } + } else { + self.push_back(ncc); + return Ok(Token::Oper(cc)); + } + } + + /* Strings */ + if char_in(&self.lexemes.string_delim, cc) { + let mut buffer = String::new(); + + loop { + let nc = self.next_char(); + if nc == None { + return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InString))); + } + let ncc = nc.unwrap(); + if ncc == self.lexemes.esc_intro { + let ec = self.next_char(); + if ec == None { + return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape))); + } + let ecc = ec.unwrap(); + + if ecc == self.lexemes.esc_hex { + let mut value = String::new(); + loop { + let sc = self.next_char(); + if None == sc { + return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape))); + } + let scc = sc.unwrap(); + + if scc.is_digit(16) { + value.push(scc); + } else { + self.push_back(scc); + break; + } + } + let rc = u32::from_str_radix(&value, 16); + if let Err(err) = rc { + return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Hexadecimal, value, Some(Box::new(err))))); + } + let rc = ::std::char::from_u32(rc.unwrap()); + match rc { + Some(rcc) => buffer.push(rcc), + None => return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Hexadecimal, value, None))), + } + continue; + } + + if ecc == self.lexemes.esc_oct { + let mut value = String::new(); + loop { + let sc = self.next_char(); + if None == sc { + return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape))); + } + let scc = sc.unwrap(); + + if scc.is_digit(8) { + value.push(scc); + } else { + self.push_back(scc); + break; + } + } + let rc = u32::from_str_radix(&value, 8); + if let Err(err) = rc { + return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Octal, value, Some(Box::new(err))))); + } + let rc = ::std::char::from_u32(rc.unwrap()); + match rc { + Some(rcc) => buffer.push(rcc), + None => return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Octal, value, None))), + } + continue; + } + + buffer.push(*self.lexemes.escapes.get(&ecc).unwrap_or(&ecc)); + continue; + } + + if ncc == cc { + return Ok(Token::String(buffer)); + } + + buffer.push(ncc); + } + } + + /* Numeric constants */ + if cc.is_digit(10) { + let mut radix = 10; + let mut buffer = String::new(); + let mut floating = false; + + if cc == '0' { + let nc = self.next_char(); + if nc == None { + return Ok(Token::Integer(0)); + } + let ncc = nc.unwrap(); + + if ncc == self.lexemes.esc_hex { + radix = 16; + } else if ncc == self.lexemes.esc_oct { + radix = 8; + } else { + buffer.push(cc); + buffer.push(ncc); + } + } + + loop { + let dc = self.next_char(); + if dc == None { + break; + } + let dcc = dc.unwrap(); + + if dcc.is_digit(radix) { + buffer.push(dcc); + } else if dcc == self.lexemes.radix_point { + floating = true; + buffer.push(dcc); + } else if floating && char_in(&self.lexemes.exponent_chars, dcc) { + buffer.push(dcc); + } else { + self.push_back(dcc); + break; + } + } + + return if floating { + match buffer.parse::<f32>() { + Ok(v) => Ok(Token::Float(v)), + Err(err) => Err(ErrorType::new(ErrorKind::BadNumericLiteral(NumericKind::Float, buffer, Some(Box::new(err))))), + } + } else { + match buffer.parse::<isize>() { + Ok(v) => Ok(Token::Integer(v)), + Err(err) => Err(ErrorType::new(ErrorKind::BadNumericLiteral(NumericKind::Integer, buffer, Some(Box::new(err))))), + } + }; + } + + /* Identifiers */ + if cc.is_xid_start() { + let mut buffer = String::new(); + buffer.push(cc); + + loop { + let nc = self.next_char(); + if nc == None { + return Ok(Token::Ident(buffer)); + } + let ncc = nc.unwrap(); + + if ncc.is_xid_continue() { + buffer.push(ncc); + } else { + self.push_back(ncc); + break; + } + } + + return Ok(Token::Ident(buffer)); + } + + /* Everything else */ + return Ok(Token::Oper(cc)); + } +} + +impl<T: Iterator<Item=char>> Iterator for Tokenizer<T> { + type Item = Token; + + fn next(&mut self) -> Option<Token> { + match self.next_token() { + Err(_) => None, + Ok(Token::EOF) => None, + Ok(t) => Some(t), + } + } +} @@ -1,4 +1,5 @@ #![feature(associated_consts)] +#![feature(unicode)] extern crate byteorder; extern crate rand; @@ -8,6 +9,7 @@ pub use types::*; pub mod synth; pub mod proto; +pub mod lang; #[cfg(test)] mod tests { diff --git a/src/proto.rs b/src/proto.rs index c202a10..1af1c72 100644 --- a/src/proto.rs +++ b/src/proto.rs @@ -13,7 +13,7 @@ pub enum Command { Play{sec: u32, usec: u32, freq: u32, amp: f32, voice: u32}, Caps{voices: u32, tp: [u8; 4], ident: [u8; 24]}, PCM{samples: [i16; 16]}, - Unknown{data: [u8; 36]}, + Unknown{data: [u8; Command::SIZE]}, } impl Command { @@ -34,10 +34,10 @@ impl Command { } } -impl<'a> From<&'a [u8; 36]> for Command { - fn from(packet: &'a [u8; 36]) -> Command { - let mut fields_u32: [u32; 9] = unsafe { mem::uninitialized() }; - let mut fields_f32: [f32; 9] = unsafe { mem::uninitialized() }; +impl<'a> From<&'a [u8; Command::SIZE]> for Command { + fn from(packet: &'a [u8; Command::SIZE]) -> Command { + let mut fields_u32: [u32; Command::SIZE / 4] = unsafe { mem::uninitialized() }; + let mut fields_f32: [f32; Command::SIZE / 4] = unsafe { mem::uninitialized() }; NetworkEndian::read_u32_into(packet, &mut fields_u32); unsafe { NetworkEndian::read_f32_into_unchecked(packet, &mut fields_f32); } @@ -73,10 +73,44 @@ impl<'a> From<&'a [u8; 36]> for Command { Command::PCM{samples: samples} }, _ => { - let mut data: [u8; 36] = unsafe { mem::uninitialized() }; + let mut data: [u8; Command::SIZE] = unsafe { mem::uninitialized() }; data.copy_from_slice(packet); Command::Unknown{data: data} } } } } + +impl<'a> From<&'a Command> for [u8; Command::SIZE] { + fn from(cmd: &'a Command) -> [u8; Command::SIZE] { + let mut ret: [u8; Command::SIZE] = [0u8; Command::SIZE]; + + match *cmd { + Command::KeepAlive => NetworkEndian::write_u32(&mut ret[..4], 0), + Command::Ping{data} => { + NetworkEndian::write_u32(&mut ret[..4], 1); + (&mut ret[4..]).copy_from_slice(&data); + }, + Command::Quit => NetworkEndian::write_u32(&mut ret[..4], 2), + Command::Play{sec, usec, freq, amp, voice} => { + NetworkEndian::write_u32_into(&[3u32, sec, usec, freq], &mut ret[..16]); + NetworkEndian::write_f32(&mut ret[16..20], amp); + NetworkEndian::write_u32(&mut ret[20..24], voice); + }, + Command::Caps{voices, tp, ident} => { + NetworkEndian::write_u32_into(&[4u32, voices], &mut ret[..8]); + (&mut ret[8..12]).copy_from_slice(&tp); + (&mut ret[12..]).copy_from_slice(&ident); + }, + Command::PCM{samples} => { + NetworkEndian::write_u32(&mut ret[..4], 5); + NetworkEndian::write_i16_into(&samples, &mut ret[4..]); + }, + Command::Unknown{data} => { + ret.copy_from_slice(&data); + }, + }; + + ret + } +} |