htmlescape/
encode.rs

1use std::io::{self, Write};
2use std::char;
3use io_support::{write_char};
4use entities::*;
5
6///
7/// HTML entity-encode a string.
8///
9/// Entity-encodes a string with a minimal set of entities:
10///
11/// - `" -- "`
12/// - `& -- &`
13/// - `' -- '`
14/// - `< -- &lt;`
15/// - `> -- &gt;`
16///
17/// # Arguments
18/// - `s` - The string to encode.
19///
20/// # Return value
21/// The encoded string.
22///
23/// # Example
24/// ~~~
25/// let encoded = htmlescape::encode_minimal("<em>Hej!</em>");
26/// assert_eq!(&encoded, "&lt;em&gt;Hej!&lt;/em&gt;");
27/// ~~~
28///
29/// # Safety notes
30/// Using the function to encode an untrusted string that is to be used as a HTML attribute value
31/// may lead to XSS vulnerabilities. Consider the following example:
32///
33/// ~~~
34/// let name = "dummy onmouseover=alert(/XSS/)";    // User input
35/// let tag = format!("<option value={}>", htmlescape::encode_minimal(name));
36/// // Here `tag` is    "<option value=dummy onmouseover=alert(/XSS/)>"
37/// ~~~
38///
39/// Use `escape_attribute` for escaping HTML attribute values.
40pub fn encode_minimal(s: &str) -> String {
41    let mut writer = Vec::with_capacity((s.len()/3 + 1) * 4);
42    match encode_minimal_w(s, &mut writer) {
43        Err(_) => panic!(),
44        Ok(_) => String::from_utf8(writer).expect("impossible invalid UTF-8 in output")
45    }
46}
47
48///
49/// HTML entity-encode a string to a writer.
50///
51/// Similar to `encode_minimal`, except that the output is written to a writer rather
52/// than returned as a `String`.
53///
54/// # Arguments
55/// - `s` - The string to encode.
56/// - `writer` - Output is written to here.
57pub fn encode_minimal_w<W: Write>(s: &str, writer: &mut W) -> io::Result<()> {
58    for c in s.chars() {
59        match get_entity(c) {
60            None => try!(write_char(writer, c)),
61            Some(entity) => try!(writer.write_all(entity.as_bytes()))
62        }
63    }
64    Ok(())
65}
66
67///
68/// HTML entity-encodes a string for use in attributes values.
69///
70/// Entity-encodes a string using an extensive set of entities, giving a string suitable for use
71/// in HTML attribute values. All entities from `encode_minimal` are used, and further, all
72/// non-alphanumeric ASCII characters are hex-encoded (`&#x__;`).
73/// See the [OWASP XSS Prevention Cheat Sheet](
74/// https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet) for more
75/// information on entity-encoding for attribute values.
76///
77/// # Arguments
78/// - `s` - The string to encode.
79///
80/// # Return value
81/// The encoded string.
82///
83/// # Example
84/// ~~~
85/// let encoded = htmlescape::encode_attribute("\"No\", he said.");
86/// assert_eq!(&encoded, "&quot;No&quot;&#x2C;&#x20;he&#x20;said&#x2E;");
87/// ~~~
88pub fn encode_attribute(s: &str) -> String {
89    let mut writer = Vec::with_capacity(s.len() * 3);
90    match encode_attribute_w(s, &mut writer) {
91        Err(_) => panic!(),
92        Ok(_) => String::from_utf8(writer).unwrap()
93    }
94}
95
96///
97/// HTML entity-encodes a string, for use in attributes values, to a writer.
98///
99/// Similar to `encode_attribute`, except that the output is written to a writer rather
100/// than returned as a `String`.
101///
102/// # Arguments
103/// - `s` - The string to encode.
104/// - `writer` - Output is written to here.
105pub fn encode_attribute_w<W: Write>(s: &str, writer: &mut W) -> io::Result<()> {
106    for c in s.chars() {
107        let b = c as usize;
108        let res = match get_entity(c) {
109            Some(entity) => writer.write_all(entity.as_bytes()),
110            None =>
111                if b < 256 && (b > 127 || !is_ascii_alnum(c)) {
112                    write_hex(writer, c)
113                } else {
114                    write_char(writer, c)
115                }
116        };
117        try!(res);
118    }
119    Ok(())
120}
121
122fn get_entity(c: char) -> Option<&'static str> {
123    match MINIMAL_ENTITIES.binary_search_by(|&(ec, _)| ec.cmp(&c) ) {
124        Err(..) => None,
125        Ok(idx) => {
126            let (_, e) = MINIMAL_ENTITIES[idx];
127            Some(e)
128        }
129    }
130}
131
132fn write_hex<W: Write>(writer: &mut W, c: char) -> io::Result<()> {
133    let hex = b"0123456789ABCDEF";
134    try!(writer.write(b"&#x"));
135    let n = c as u8;
136    let bytes = [hex[((n & 0xF0) >> 4) as usize],
137                 hex[(n & 0x0F) as usize],
138                 b';'];
139    writer.write_all(&bytes)
140}
141
142fn is_ascii_alnum(c: char) -> bool {
143    (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
144}
145