diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lang/mod.rs | 12 | ||||
| -rw-r--r-- | src/lang/tokenizer.rs | 395 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/proto.rs | 46 | 
4 files changed, 449 insertions, 6 deletions
| diff --git a/src/lang/mod.rs b/src/lang/mod.rs new file mode 100644 index 0000000..8256f34 --- /dev/null +++ b/src/lang/mod.rs @@ -0,0 +1,12 @@ +pub mod tokenizer; +pub use self::tokenizer::Tokenizer; + +pub enum Token { +    Ident(String), +    Integer(isize), +    Float(f32), +    Oper(char), +    String(String), +    EOF, +} + diff --git a/src/lang/tokenizer.rs b/src/lang/tokenizer.rs new file mode 100644 index 0000000..d1b34e0 --- /dev/null +++ b/src/lang/tokenizer.rs @@ -0,0 +1,395 @@ +use std::collections::HashMap; +use std::error::Error; +use std::fmt; +use super::*; + +pub struct Lexemes { +    radix_point: char, +    exponent_chars: String, +    string_delim: String, +    esc_intro: char, +    esc_hex: char, +    esc_oct: char, +    com_outer: char, +    com_inner: char, +    escapes: HashMap<char, char> +} + +impl Default for Lexemes { +    fn default() -> Lexemes { +        let mut ret = Lexemes { +            radix_point: '.', +            exponent_chars: "eE".to_string(), +            string_delim: "'\"".to_string(), +            esc_intro: '\\', +            esc_hex: 'x', +            esc_oct: 'o', +            com_outer: '/', +            com_inner: '*', +            escapes: HashMap::new(), +        }; + +        ret.escapes.insert('n', '\n'); +        ret.escapes.insert('t', '\t'); +        ret.escapes.insert('r', '\r'); +        ret.escapes.insert('"', '"'); +        ret.escapes.insert('\'', '\''); + +        ret +    } +} + +#[derive(Debug)] +pub enum Location { +    InString, +    InStringEscape, +} + +#[derive(Debug)] +pub enum EscapeKind { +    Hexadecimal, +    Octal, +} + +#[derive(Debug)] +pub enum NumericKind { +    Integer, +    Float, +} + +#[derive(Debug)] +pub enum ErrorKind { +    UnexpectedEOF(Location), +    BadEscapeValue(EscapeKind, String, Option<Box<Error>>), +    BadNumericLiteral(NumericKind, String, Option<Box<Error>>), +    UnknownChar(char), +} + +#[derive(Debug)] +pub struct ErrorType { +    pub kind: ErrorKind, +    desc: String, +} + +impl ErrorType { +    pub fn new(kind: ErrorKind) -> ErrorType { +        let mut ret = ErrorType { +            kind: kind, +            desc: "".to_string(), +        }; + +        ret.desc = match &ret.kind { +            &ErrorKind::UnexpectedEOF(ref loc) => format!("Unexpected EOF {}", match loc { +                &Location::InString => "in string constant", +                &Location::InStringEscape => "in string escape", +            }), +            &ErrorKind::BadEscapeValue(ref kind, ref val, ref err) => format!("Bad {} escape {}: {:?}", match kind { +                &EscapeKind::Hexadecimal => "hexadecimal", +                &EscapeKind::Octal => "octal", +            }, val, err), +            &ErrorKind::BadNumericLiteral(ref kind, ref val, ref err) => format!("Bad {} literal {}: {:?}", match kind { +                &NumericKind::Integer => "integer", +                &NumericKind::Float => "floating point", +            }, val, err), +            &ErrorKind::UnknownChar(c) => format!("Unknown character {}", c), +        }; + +        ret +    } + +    pub fn with_description(kind: ErrorKind, description: String) -> ErrorType { +        ErrorType { +            kind: kind, +            desc: description, +        } +    } +} + +impl Error for ErrorType { +    fn description<'a>(&'a self) -> &'a str { +        &self.desc +    } + +    fn cause(&self) -> Option<&Error> { +        match &self.kind { +            &ErrorKind::UnexpectedEOF(_) => None, +            &ErrorKind::BadEscapeValue(_, _, ref err) => match err { +                &Some(ref err) => Some(&**err), +                &None => None, +            }, +            &ErrorKind::BadNumericLiteral(_, _, ref err) => match err { +                &Some(ref err) => Some(&**err), +                &None => None, +            }, +            &ErrorKind::UnknownChar(_) => None, +        } +    } +} + +impl fmt::Display for ErrorType { +    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { +        write!(f, "{}", self.description()) +    } +} + +// NB: linear in size of set. This is practically fine for very small sets, but shouldn't be used +// otherwise. +fn char_in(s: &str, c: char) -> bool { +    s.chars().find(|&x| x == c).map_or(false, |_| true) +} + +pub struct Tokenizer<T: Iterator<Item=char>> { +    reader: T, +    pushback: Option<char>, +    lexemes: Lexemes, +} + +impl<T: Iterator<Item=char>> Tokenizer<T> { +    pub fn new(reader: T) -> Tokenizer<T> { +        Tokenizer { +            reader: reader, +            pushback: None, +            lexemes: Default::default(), +        } +    } + +    fn push_back(&mut self, c: char) -> bool { +        match self.pushback { +            None => { +                self.pushback = Some(c); +                true +            }, +            Some(_) => false, +        } +    } + +    fn next_char(&mut self) -> Option<char> { +        match self.pushback { +            Some(c) => { +                self.pushback = None; +                Some(c) +            }, +            None => self.reader.next(), +        } +    } + +    fn next_token(&mut self) -> Result<Token, ErrorType> { +        let mut c = self.next_char(); +        if c == None { +            return Ok(Token::EOF); +        } +        let mut cc = c.unwrap(); + +        while cc.is_whitespace() { +            c = self.next_char(); +            if c == None { +                return Ok(Token::EOF); +            } +            cc = c.unwrap(); +        } + +        /* Comments */ +        if cc == self.lexemes.com_outer { +            let nc = self.next_char(); +            if nc == None { +                return Ok(Token::Oper(cc)); +            } +            let ncc = nc.unwrap(); +            if ncc == self.lexemes.com_inner { +                loop { +                    match self.next_char() { +                        None => return Ok(Token::EOF), +                        Some(x) if x == self.lexemes.com_inner => match self.next_char() { +                            None => return Ok(Token::EOF), +                            Some(x) if x == self.lexemes.com_outer => return self.next_token(), +                            Some(_) => continue, +                        }, +                        Some(_) => continue, +                    } +                } +            } else { +                self.push_back(ncc); +                return Ok(Token::Oper(cc)); +            } +        } + +        /* Strings */ +        if char_in(&self.lexemes.string_delim, cc) { +            let mut buffer = String::new(); + +            loop { +                let nc = self.next_char(); +                if nc == None { +                    return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InString))); +                } +                let ncc = nc.unwrap(); +                if ncc == self.lexemes.esc_intro { +                    let ec = self.next_char(); +                    if ec == None { +                        return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape))); +                    } +                    let ecc = ec.unwrap(); + +                    if ecc == self.lexemes.esc_hex { +                        let mut value = String::new(); +                        loop { +                            let sc = self.next_char(); +                            if None == sc { +                                return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape))); +                            } +                            let scc = sc.unwrap(); + +                            if scc.is_digit(16) { +                                value.push(scc); +                            } else { +                                self.push_back(scc); +                                break; +                            } +                        } +                        let rc = u32::from_str_radix(&value, 16); +                        if let Err(err) = rc { +                            return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Hexadecimal, value, Some(Box::new(err))))); +                        } +                        let rc = ::std::char::from_u32(rc.unwrap()); +                        match rc { +                            Some(rcc) => buffer.push(rcc), +                            None => return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Hexadecimal, value, None))), +                        } +                        continue; +                    } + +                    if ecc == self.lexemes.esc_oct { +                        let mut value = String::new(); +                        loop { +                            let sc = self.next_char(); +                            if None == sc { +                                return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InStringEscape))); +                            } +                            let scc = sc.unwrap(); + +                            if scc.is_digit(8) { +                                value.push(scc); +                            } else { +                                self.push_back(scc); +                                break; +                            } +                        } +                        let rc = u32::from_str_radix(&value, 8); +                        if let Err(err) = rc { +                            return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Octal, value, Some(Box::new(err))))); +                        } +                        let rc = ::std::char::from_u32(rc.unwrap()); +                        match rc { +                            Some(rcc) => buffer.push(rcc), +                            None => return Err(ErrorType::new(ErrorKind::BadEscapeValue(EscapeKind::Octal, value, None))), +                        } +                        continue; +                    } + +                    buffer.push(*self.lexemes.escapes.get(&ecc).unwrap_or(&ecc)); +                    continue; +                } + +                if ncc == cc { +                    return Ok(Token::String(buffer)); +                } + +                buffer.push(ncc); +            } +        } + +        /* Numeric constants */ +        if cc.is_digit(10) { +            let mut radix = 10; +            let mut buffer = String::new(); +            let mut floating = false; + +            if cc == '0' { +                let nc = self.next_char(); +                if nc == None { +                    return Ok(Token::Integer(0)); +                } +                let ncc = nc.unwrap(); + +                if ncc == self.lexemes.esc_hex { +                    radix = 16; +                } else if ncc == self.lexemes.esc_oct { +                    radix = 8; +                } else { +                    buffer.push(cc); +                    buffer.push(ncc); +                } +            } + +            loop { +                let dc = self.next_char(); +                if dc == None { +                    break; +                } +                let dcc = dc.unwrap(); + +                if dcc.is_digit(radix) { +                    buffer.push(dcc); +                } else if dcc == self.lexemes.radix_point { +                    floating = true; +                    buffer.push(dcc); +                } else if floating && char_in(&self.lexemes.exponent_chars, dcc) { +                    buffer.push(dcc); +                } else { +                    self.push_back(dcc); +                    break; +                } +            } + +            return if floating { +                match buffer.parse::<f32>() { +                    Ok(v) => Ok(Token::Float(v)), +                    Err(err) => Err(ErrorType::new(ErrorKind::BadNumericLiteral(NumericKind::Float, buffer, Some(Box::new(err))))), +                } +            } else { +                match buffer.parse::<isize>() { +                    Ok(v) => Ok(Token::Integer(v)), +                    Err(err) => Err(ErrorType::new(ErrorKind::BadNumericLiteral(NumericKind::Integer, buffer, Some(Box::new(err))))), +                } +            }; +        } + +        /* Identifiers */ +        if cc.is_xid_start() { +            let mut buffer = String::new(); +            buffer.push(cc); + +            loop { +                let nc = self.next_char(); +                if nc == None { +                    return Ok(Token::Ident(buffer)); +                } +                let ncc = nc.unwrap(); + +                if ncc.is_xid_continue() { +                    buffer.push(ncc); +                } else { +                    self.push_back(ncc); +                    break; +                } +            } + +            return Ok(Token::Ident(buffer)); +        } + +        /* Everything else */ +        return Ok(Token::Oper(cc)); +    } +} + +impl<T: Iterator<Item=char>> Iterator for Tokenizer<T> { +    type Item = Token; + +    fn next(&mut self) -> Option<Token> { +        match self.next_token() { +            Err(_) => None, +            Ok(Token::EOF) => None, +            Ok(t) => Some(t), +        } +    } +} @@ -1,4 +1,5 @@  #![feature(associated_consts)] +#![feature(unicode)]  extern crate byteorder;  extern crate rand; @@ -8,6 +9,7 @@ pub use types::*;  pub mod synth;  pub mod proto; +pub mod lang;  #[cfg(test)]  mod tests { diff --git a/src/proto.rs b/src/proto.rs index c202a10..1af1c72 100644 --- a/src/proto.rs +++ b/src/proto.rs @@ -13,7 +13,7 @@ pub enum Command {      Play{sec: u32, usec: u32, freq: u32, amp: f32, voice: u32},      Caps{voices: u32, tp: [u8; 4], ident: [u8; 24]},      PCM{samples: [i16; 16]}, -    Unknown{data: [u8; 36]}, +    Unknown{data: [u8; Command::SIZE]},  }  impl Command { @@ -34,10 +34,10 @@ impl Command {      }  } -impl<'a> From<&'a [u8; 36]> for Command { -    fn from(packet: &'a [u8; 36]) -> Command { -        let mut fields_u32: [u32; 9] = unsafe { mem::uninitialized() }; -        let mut fields_f32: [f32; 9] = unsafe { mem::uninitialized() }; +impl<'a> From<&'a [u8; Command::SIZE]> for Command { +    fn from(packet: &'a [u8; Command::SIZE]) -> Command { +        let mut fields_u32: [u32; Command::SIZE / 4] = unsafe { mem::uninitialized() }; +        let mut fields_f32: [f32; Command::SIZE / 4] = unsafe { mem::uninitialized() };          NetworkEndian::read_u32_into(packet, &mut fields_u32);          unsafe { NetworkEndian::read_f32_into_unchecked(packet, &mut fields_f32); } @@ -73,10 +73,44 @@ impl<'a> From<&'a [u8; 36]> for Command {                  Command::PCM{samples: samples}              },              _ => { -                let mut data: [u8; 36] = unsafe { mem::uninitialized() }; +                let mut data: [u8; Command::SIZE] = unsafe { mem::uninitialized() };                  data.copy_from_slice(packet);                  Command::Unknown{data: data}              }          }      }  } + +impl<'a> From<&'a Command> for [u8; Command::SIZE] { +    fn from(cmd: &'a Command) -> [u8; Command::SIZE] { +        let mut ret: [u8; Command::SIZE] = [0u8; Command::SIZE]; + +        match *cmd { +            Command::KeepAlive => NetworkEndian::write_u32(&mut ret[..4], 0), +            Command::Ping{data} => { +                NetworkEndian::write_u32(&mut ret[..4], 1); +                (&mut ret[4..]).copy_from_slice(&data); +            }, +            Command::Quit => NetworkEndian::write_u32(&mut ret[..4], 2), +            Command::Play{sec, usec, freq, amp, voice} => { +                NetworkEndian::write_u32_into(&[3u32, sec, usec, freq], &mut ret[..16]); +                NetworkEndian::write_f32(&mut ret[16..20], amp); +                NetworkEndian::write_u32(&mut ret[20..24], voice); +            }, +            Command::Caps{voices, tp, ident} => { +                NetworkEndian::write_u32_into(&[4u32, voices], &mut ret[..8]); +                (&mut ret[8..12]).copy_from_slice(&tp); +                (&mut ret[12..]).copy_from_slice(&ident); +            }, +            Command::PCM{samples} => { +                NetworkEndian::write_u32(&mut ret[..4], 5); +                NetworkEndian::write_i16_into(&samples, &mut ret[4..]); +            }, +            Command::Unknown{data} => { +                ret.copy_from_slice(&data); +            }, +        }; + +        ret +    } +} | 
