001    /*
002     * Copyright (c) 2004 World Wide Web Consortium,
003     *
004     * (Massachusetts Institute of Technology, European Research Consortium for
005     * Informatics and Mathematics, Keio University). All Rights Reserved. This
006     * work is distributed under the W3C(r) Software License [1] in the hope that
007     * it will be useful, but WITHOUT ANY WARRANTY; without even the implied
008     * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
009     *
010     * [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
011     */
012    
013    package org.w3c.dom;
014    
015    /**
016     * The <code>Text</code> interface inherits from <code>CharacterData</code> 
017     * and represents the textual content (termed <a href='http://www.w3.org/TR/2004/REC-xml-20040204#syntax'>character data</a> in XML) of an <code>Element</code> or <code>Attr</code>. If there is no 
018     * markup inside an element's content, the text is contained in a single 
019     * object implementing the <code>Text</code> interface that is the only 
020     * child of the element. If there is markup, it is parsed into the 
021     * information items (elements, comments, etc.) and <code>Text</code> nodes 
022     * that form the list of children of the element.
023     * <p>When a document is first made available via the DOM, there is only one 
024     * <code>Text</code> node for each block of text. Users may create adjacent 
025     * <code>Text</code> nodes that represent the contents of a given element 
026     * without any intervening markup, but should be aware that there is no way 
027     * to represent the separations between these nodes in XML or HTML, so they 
028     * will not (in general) persist between DOM editing sessions. The 
029     * <code>Node.normalize()</code> method merges any such adjacent 
030     * <code>Text</code> objects into a single node for each block of text.
031     * <p> No lexical check is done on the content of a <code>Text</code> node 
032     * and, depending on its position in the document, some characters must be 
033     * escaped during serialization using character references; e.g. the 
034     * characters "&lt;&amp;" if the textual content is part of an element or of 
035     * an attribute, the character sequence "]]&gt;" when part of an element, 
036     * the quotation mark character " or the apostrophe character ' when part of 
037     * an attribute. 
038     * <p>See also the <a href='http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407'>Document Object Model (DOM) Level 3 Core Specification</a>.
039     */
040    public interface Text extends CharacterData {
041        /**
042         * Breaks this node into two nodes at the specified <code>offset</code>, 
043         * keeping both in the tree as siblings. After being split, this node 
044         * will contain all the content up to the <code>offset</code> point. A 
045         * new node of the same type, which contains all the content at and 
046         * after the <code>offset</code> point, is returned. If the original 
047         * node had a parent node, the new node is inserted as the next sibling 
048         * of the original node. When the <code>offset</code> is equal to the 
049         * length of this node, the new node has no data.
050         * @param offset The 16-bit unit offset at which to split, starting from 
051         *   <code>0</code>.
052         * @return The new node, of the same type as this node.
053         * @exception DOMException
054         *   INDEX_SIZE_ERR: Raised if the specified offset is negative or greater 
055         *   than the number of 16-bit units in <code>data</code>.
056         *   <br>NO_MODIFICATION_ALLOWED_ERR: Raised if this node is readonly.
057         */
058        public Text splitText(int offset)
059                              throws DOMException;
060    
061        /**
062         * Returns whether this text node contains <a href='http://www.w3.org/TR/2004/REC-xml-infoset-20040204#infoitem.character'>
063         * element content whitespace</a>, often abusively called "ignorable whitespace". The text node is 
064         * determined to contain whitespace in element content during the load 
065         * of the document or if validation occurs while using 
066         * <code>Document.normalizeDocument()</code>.
067         * @since DOM Level 3
068         */
069        public boolean isElementContentWhitespace();
070    
071        /**
072         * Returns all text of <code>Text</code> nodes logically-adjacent text 
073         * nodes to this node, concatenated in document order.
074         * <br>For instance, in the example below <code>wholeText</code> on the 
075         * <code>Text</code> node that contains "bar" returns "barfoo", while on 
076         * the <code>Text</code> node that contains "foo" it returns "barfoo". 
077         * @since DOM Level 3
078         */
079        public String getWholeText();
080    
081        /**
082         * Replaces the text of the current node and all logically-adjacent text 
083         * nodes with the specified text. All logically-adjacent text nodes are 
084         * removed including the current node unless it was the recipient of the 
085         * replacement text.
086         * <br>This method returns the node which received the replacement text. 
087         * The returned node is: 
088         * <ul>
089         * <li><code>null</code>, when the replacement text is 
090         * the empty string;
091         * </li>
092         * <li>the current node, except when the current node is 
093         * read-only;
094         * </li>
095         * <li> a new <code>Text</code> node of the same type (
096         * <code>Text</code> or <code>CDATASection</code>) as the current node 
097         * inserted at the location of the replacement.
098         * </li>
099         * </ul>
100         * <br>For instance, in the above example calling 
101         * <code>replaceWholeText</code> on the <code>Text</code> node that 
102         * contains "bar" with "yo" in argument results in the following: 
103         * <br>Where the nodes to be removed are read-only descendants of an 
104         * <code>EntityReference</code>, the <code>EntityReference</code> must 
105         * be removed instead of the read-only nodes. If any 
106         * <code>EntityReference</code> to be removed has descendants that are 
107         * not <code>EntityReference</code>, <code>Text</code>, or 
108         * <code>CDATASection</code> nodes, the <code>replaceWholeText</code> 
109         * method must fail before performing any modification of the document, 
110         * raising a <code>DOMException</code> with the code 
111         * <code>NO_MODIFICATION_ALLOWED_ERR</code>.
112         * <br>For instance, in the example below calling 
113         * <code>replaceWholeText</code> on the <code>Text</code> node that 
114         * contains "bar" fails, because the <code>EntityReference</code> node 
115         * "ent" contains an <code>Element</code> node which cannot be removed.
116         * @param content The content of the replacing <code>Text</code> node.
117         * @return The <code>Text</code> node created with the specified content.
118         * @exception DOMException
119         *   NO_MODIFICATION_ALLOWED_ERR: Raised if one of the <code>Text</code> 
120         *   nodes being replaced is readonly.
121         * @since DOM Level 3
122         */
123        public Text replaceWholeText(String content)
124                                     throws DOMException;
125    
126    }