1use std::io::{Write, Read, Error, ErrorKind};
2use std::io;
3use std::fmt;
4
5pub fn write_char<W: Write>(writer: &mut W, c: char) -> io::Result<()> {
6 let mut buf = [0u8;4];
7 let utf8 = encode_char_utf8(c, &mut buf);
8 writer.write_all(utf8)
9}
10
11fn encode_char_utf8<'a>(c: char, buf: &'a mut [u8]) -> &'a [u8] {
12 let c = c as u32;
13 if c <= 0x7f {
14 buf[0] = c as u8;
15 &buf[..1]
16 } else if c <= 0x7ff {
17 buf[1] = 0b10000000 | (c & 0b00111111) as u8;
18 buf[0] = 0b11000000 | ((c >> 6) & 0b00011111) as u8;
19 &buf[..2]
20 } else if c <= 0xffff {
21 buf[2] = 0b10000000 | (c & 0b00111111) as u8;
22 buf[1] = 0b10000000 | ((c >> 6) & 0b00111111) as u8;
23 buf[0] = 0b11100000 | ((c >> 12) & 0b00001111) as u8;
24 &buf[..3]
25 } else {
26 buf[3] = 0b10000000 | (c & 0b00111111) as u8;
27 buf[2] = 0b10000000 | ((c >> 6) & 0b00111111) as u8;
28 buf[1] = 0b10000000 | ((c >> 12) & 0b00111111) as u8;
29 buf[0] = 0b11110000 | ((c >> 18) & 0b00000111) as u8;
30 &buf[..4]
31 }
32}
33
34fn utf8_char_bytes(first: u8) -> usize {
35 if first & 0b10000000 == 0 {
36 1
37 } else if first & 0b11100000 == 0b11000000 {
38 2
39 } else if first & 0b11110000 == 0b11100000 {
40 3
41 } else if first & 0b11111000 == 0b11110000 {
42 4
43 } else {
44 0
45 }
46}
47
48
49pub struct Chars<R> {
55 inner: R
56}
57
58pub fn chars<R: Read>(reader: R) -> Chars<R> {
59 Chars { inner: reader }
60}
61
62#[derive(Debug)]
63pub enum CharsError {
64 NotUtf8,
67
68 Other(Error),
70}
71
72impl<R: Read> Iterator for Chars<R> {
73 type Item = Result<char, CharsError>;
74
75 fn next(&mut self) -> Option<Result<char, CharsError>> {
76 let first_byte = match read_a_byte(&mut self.inner) {
77 None => return None,
78 Some(Ok(b)) => b,
79 Some(Err(e)) => return Some(Err(CharsError::Other(e))),
80 };
81 let width = utf8_char_bytes(first_byte);
82 if width == 1 { return Some(Ok(first_byte as char)) }
83 if width == 0 { return Some(Err(CharsError::NotUtf8)) }
84 let mut buf = [first_byte, 0, 0, 0];
85 {
86 let mut start = 1;
87 while start < width {
88 match self.inner.read(&mut buf[start..width]) {
89 Ok(0) => return Some(Err(CharsError::NotUtf8)),
90 Ok(n) => start += n,
91 Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
92 Err(e) => return Some(Err(CharsError::Other(e))),
93 }
94 }
95 }
96 Some(match ::std::str::from_utf8(&buf[..width]).ok() {
97 Some(s) => Ok(s.chars().next().unwrap()),
98 None => Err(CharsError::NotUtf8),
99 })
100 }
101}
102
103fn read_a_byte<R: Read>(reader: &mut R) -> Option<io::Result<u8>> {
104 let mut buf = [0];
105 loop {
106 return match reader.read(&mut buf) {
107 Ok(0) => None,
108 Ok(..) => Some(Ok(buf[0])),
109 Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
110 Err(e) => Some(Err(e)),
111 };
112 }
113}
114
115impl fmt::Display for CharsError {
116 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
117 match *self {
118 CharsError::NotUtf8 => {
119 "byte stream did not contain valid utf8".fmt(f)
120 }
121 CharsError::Other(ref e) => e.fmt(f),
122 }
123 }
124}
125
126#[cfg(test)]
127mod test {
128
129 use super::encode_char_utf8;
130
131 #[test]
132 fn test_encode_char_utf8() {
133 do_test_encode_char_utf8('$', &[0x24]);
134 do_test_encode_char_utf8('¢', &[0xc2, 0xa2]);
135 do_test_encode_char_utf8('€', &[0xe2, 0x82, 0xac]);
136 do_test_encode_char_utf8('\u{10348}', &[0xf0, 0x90, 0x8d, 0x88]);
137 }
138
139 fn do_test_encode_char_utf8(c: char, expected: &[u8]) {
140 let mut buf = [0u8;4];
141 let utf8 = encode_char_utf8(c, &mut buf);
142 assert_eq!(utf8, expected);
143 }
144}