htmlescape/encode.rs
1use std::io::{self, Write};
2use std::char;
3use io_support::{write_char};
4use entities::*;
5
6///
7/// HTML entity-encode a string.
8///
9/// Entity-encodes a string with a minimal set of entities:
10///
11/// - `" -- "`
12/// - `& -- &`
13/// - `' -- '`
14/// - `< -- <`
15/// - `> -- >`
16///
17/// # Arguments
18/// - `s` - The string to encode.
19///
20/// # Return value
21/// The encoded string.
22///
23/// # Example
24/// ~~~
25/// let encoded = htmlescape::encode_minimal("<em>Hej!</em>");
26/// assert_eq!(&encoded, "<em>Hej!</em>");
27/// ~~~
28///
29/// # Safety notes
30/// Using the function to encode an untrusted string that is to be used as a HTML attribute value
31/// may lead to XSS vulnerabilities. Consider the following example:
32///
33/// ~~~
34/// let name = "dummy onmouseover=alert(/XSS/)"; // User input
35/// let tag = format!("<option value={}>", htmlescape::encode_minimal(name));
36/// // Here `tag` is "<option value=dummy onmouseover=alert(/XSS/)>"
37/// ~~~
38///
39/// Use `escape_attribute` for escaping HTML attribute values.
40pub fn encode_minimal(s: &str) -> String {
41 let mut writer = Vec::with_capacity((s.len()/3 + 1) * 4);
42 match encode_minimal_w(s, &mut writer) {
43 Err(_) => panic!(),
44 Ok(_) => String::from_utf8(writer).expect("impossible invalid UTF-8 in output")
45 }
46}
47
48///
49/// HTML entity-encode a string to a writer.
50///
51/// Similar to `encode_minimal`, except that the output is written to a writer rather
52/// than returned as a `String`.
53///
54/// # Arguments
55/// - `s` - The string to encode.
56/// - `writer` - Output is written to here.
57pub fn encode_minimal_w<W: Write>(s: &str, writer: &mut W) -> io::Result<()> {
58 for c in s.chars() {
59 match get_entity(c) {
60 None => try!(write_char(writer, c)),
61 Some(entity) => try!(writer.write_all(entity.as_bytes()))
62 }
63 }
64 Ok(())
65}
66
67///
68/// HTML entity-encodes a string for use in attributes values.
69///
70/// Entity-encodes a string using an extensive set of entities, giving a string suitable for use
71/// in HTML attribute values. All entities from `encode_minimal` are used, and further, all
72/// non-alphanumeric ASCII characters are hex-encoded (`&#x__;`).
73/// See the [OWASP XSS Prevention Cheat Sheet](
74/// https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet) for more
75/// information on entity-encoding for attribute values.
76///
77/// # Arguments
78/// - `s` - The string to encode.
79///
80/// # Return value
81/// The encoded string.
82///
83/// # Example
84/// ~~~
85/// let encoded = htmlescape::encode_attribute("\"No\", he said.");
86/// assert_eq!(&encoded, ""No", he said.");
87/// ~~~
88pub fn encode_attribute(s: &str) -> String {
89 let mut writer = Vec::with_capacity(s.len() * 3);
90 match encode_attribute_w(s, &mut writer) {
91 Err(_) => panic!(),
92 Ok(_) => String::from_utf8(writer).unwrap()
93 }
94}
95
96///
97/// HTML entity-encodes a string, for use in attributes values, to a writer.
98///
99/// Similar to `encode_attribute`, except that the output is written to a writer rather
100/// than returned as a `String`.
101///
102/// # Arguments
103/// - `s` - The string to encode.
104/// - `writer` - Output is written to here.
105pub fn encode_attribute_w<W: Write>(s: &str, writer: &mut W) -> io::Result<()> {
106 for c in s.chars() {
107 let b = c as usize;
108 let res = match get_entity(c) {
109 Some(entity) => writer.write_all(entity.as_bytes()),
110 None =>
111 if b < 256 && (b > 127 || !is_ascii_alnum(c)) {
112 write_hex(writer, c)
113 } else {
114 write_char(writer, c)
115 }
116 };
117 try!(res);
118 }
119 Ok(())
120}
121
122fn get_entity(c: char) -> Option<&'static str> {
123 match MINIMAL_ENTITIES.binary_search_by(|&(ec, _)| ec.cmp(&c) ) {
124 Err(..) => None,
125 Ok(idx) => {
126 let (_, e) = MINIMAL_ENTITIES[idx];
127 Some(e)
128 }
129 }
130}
131
132fn write_hex<W: Write>(writer: &mut W, c: char) -> io::Result<()> {
133 let hex = b"0123456789ABCDEF";
134 try!(writer.write(b"&#x"));
135 let n = c as u8;
136 let bytes = [hex[((n & 0xF0) >> 4) as usize],
137 hex[(n & 0x0F) as usize],
138 b';'];
139 writer.write_all(&bytes)
140}
141
142fn is_ascii_alnum(c: char) -> bool {
143 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
144}
145